****************************************************************************
****************************************************************************
***********RESEARCH PROJECT: OFFSHORING AND IMMIGRATION*********************
****************************************************************************
***************************AUTHOR :*****************************************
*****************************Dario P.****************************
****************************************************************************



local stdate $S_DATE
local sttime $S_TIME

clear

capture log close



global prgrms "DO\"
global logs "LOG\"
global data "DARIO\DATA\"
global tables "TABLE\"
global graph "GRAPH\"



set mem 5g

set matsize 800

set more off


cd "G:\data\workdata\707562"



*******idapall3 is the matched employer-employee data constructed by using the raw data from statistics denmark (see SAS programme: Program_to_create_stata_data)

use ${data}idapall3a.dta, clear


***merge with info on country of origin
sort pnr year
merge pnr year using ${data}ie_type.dta
keep if _merge==3
drop _merge

***merge with info on occupational level
sort pnr year
merge pnr year using ${data}pstill.dta
keep if _merge==3
drop _merge
rename ie_type  ietype


***merge with education
sort hfaudd
merge hfaudd using ${data}udd.dta
drop _merge

gen edu=substr(hffsp,1,2)
destring edu, replace

gen skill1=(edu==20 | edu==25 | edu==35| edu==40 | edu==15)
gen skill2=(edu==50 | edu==60 | edu==65 | edu==70)
gen skill3=(skill1==0 & skill2==0) & edu!=.
replace skill3=1 if edu==.

gen years_of_edu=0
replace years_of_edu=6 if edu==10
replace years_of_edu=10 if edu==15 | edu==35
replace years_of_edu=12 if edu==20 | edu==25 | edu==39
replace years_of_edu=14 if edu==40 
replace years_of_edu=15 if edu==50 | edu==60
replace years_of_edu=17 if edu==65
replace years_of_edu=20 if edu==70
replace years_of_edu=6 if edu==.


gen bc=(manager==0 & ml==0)

replace work_exp=work_exp/1000
rename opr_land ieland



keep hffsp tlonkval timelon pstill edu age  skill1 skill2 skill3 years_of_edu   ml  manager bc male  cvrnr lbnr pnr year  work_exp tenure ieland ietype foreigner_tot  lonind akom 


destring akom, replace

recode akom (171=240) (181=230) (189=190) (205=230) (207=190) (208=210) (209=250) (211=260) (213=270) (215=270) (221=260) (225=250) /*
*/ (227=210)  (229=250) (231=219)  (235=240) (237=240) (251=350) (255=265) (257=350) (261=350) /*
*/ (263=265) (267=259) (271=336) (301=326) (303=340) (305=306) (307=370) (309=326) (311=330) (313=320) (315=316) (317=326) (319=326) /*
*/ (321=316) (323=326) (325=330) (327=306) (331=330) (333=330) (335=340) (337=340) (339=316)  (341=316) (343=306) /*
*/ (345=316) (351=320) (353=370) (355=360) (357=370) (359=360) (361=390) (363=360) (365=390) (367=360) (369=376) /*
*/ (371=376) (373=370) (375=376) (377=390) (379=360) (381=360) (383=360) (385=320) (387=376) (389=336) (391=376) (393=370) (395=376) /*
*/ (397=390) (401=400) (403=400) (405=400) (407=400) (409=400) (421=420) (423=480) (425=430) (427=479) (429=410) (431=430) (433=420) (435=479) (433=420) (435=479) (437=420) (439=440) /*
*/ (441=440) (443=492) (445=410) (447=440) (449=450) (451=410) (471=480) (473=430) (475=482) (477=430) (481=482) (483=480) (485=420) /*
*/ (487=482) (489=450) (491=420) (493=492) (495=450) (497=430) (499=420) (501=540) (503=580) (505=550) (507=540)  /*
*/ (511=510) (513=540) (515=510)  (517=550) (519=580) (521=550) (523=540) (527=575) (529=580) /*
*/ (531=550) (533=540) (535=540) (537=540) (539=580) (541=550) (543=510) (545=580) (551=530) (553=573) (555=573) (557=561) (559=575) (565=530) /*
*/ (567=561) (567=573) (569=575) (571=561) (577=573)  (603=630)  (609=615) (611=630) (613=766) (617=630) /*
*/ (619=766) (623=621) (625=756)  (629=621) (631=630) (651=657) (653=756) (655=760) (657=760) (659=760) (663=756) (667=760) (669=760) /*
*/ (673=665) (675=671) (677=657) (679=661) (681=760) (683=661) (685=657) (701=706) (703=746) (705=740) (709=710) (711=710) (713=710) (715=746) /*
*/  (721=706) (723=730) (725=707) (729=730) (731=730) (733=706) (735=707) (737=746) (739=706) (743=740) /*
*/ (745=746)  (749=740) (761=791) (763=791) (765=787) (767=710) (769=791) (771=740) (775=791) (777=779) (781=779) /*
*/ (783=779) (785=787)  (789=791)  (801=846) (803=849) (805=810) (807=810) (809=820) (811=849) (815=846) (817=851) /*
*/ (819=860) (821=860) (823=846) (827=820) (829=860) (831=851)  (835=849) (837=851) (839=860)  (841=813) (843=840) (845=840) (847=813) /*
*/ (861=820) if year<2006

**** 11 old municipalities (233, 509, 525, 601, 605, 627, 717, 719, 747, 793, 833)
***** were split into more municipalities: recode by exploiting the fact that dataset firm contains from 1998 on both the old and the new municipality kode, named
*****respectively GF_KOM_KODE and GF_KOM_KODE_2

destring cvrnr, replace
sort cvrnr year 
merge cvrnr year using ${data}akom2.dta 
drop _merge

bysort cvrnr: egen kom_ny=min( GF_KOM_KODE_2)

replace akom=kom_ny if (akom==233 | akom== 509 | akom==525 | akom==601 | akom== 605 | akom==627 | akom==717 | akom== 719 | akom==747 | akom==793 | akom== 833)


recode akom (147=101)
***drop those firms for which the municipality code cannot be identified

drop if akom==.



gen natives=(ietype=="1") | ieland=="5126" | ieland=="5130" | ieland=="5180" | ieland=="5134" | ieland=="5142" | ieland=="5150" | ieland=="5108" | ieland=="5140" | /*
*/ ieland=="5156" | ieland=="5164" | ieland=="5170"  | ieland=="5182" | ieland=="5104" | ieland=="5120" 


****several definitions of foreign workers
gen foreigner_nonEU=(natives!=1)
gen foreigner_nonEU_ls=(foreigner_nonEU==1  & (bc==1))
gen foreigner_tot_ls_1=(ietype!="1" & (skill2!=1))
gen foreigner_tot_ls_2=(ietype!="1" & bc==1)

gen foreigner_tot=(ietype!="1")
gen foreigner_EU=(ieland=="5126" | ieland=="5130" | ieland=="5180" | ieland=="5134" | ieland=="5142" | ieland=="5150" | ieland=="5108" | ieland=="5140" | /*
*/ ieland=="5156" | ieland=="5164" | ieland=="5170"  | ieland=="5182" | ieland=="5104" | ieland=="5120")


preserve
***merge the worker level data with the final sample of firms used in the main analysis to calculate the descriptive stats for the workforce employed in the final sample of firms
sort cvrnr year
merge cvrnr year using ${data}final_sample.dta
keep if _merge==3
capture log close
log using ${logs}table1.log, replace
gen manager2=manager==1 | ml==1
sum age skill1 skill2  skill3 years_of_edu  manager2 bc if natives==1 
bysort foreigner_nonEU: sum age skill1 skill2  skill3 years_of_edu  manager2 bc
bysort foreigner_tot: sum age skill1 skill2  skill3 years_of_edu manager2 bc
bysort foreigner_EU: sum age skill1 skill2  skill3 years_of_edu manager2  bc
keep pnr year
sort pnr year
save ${data}final_sample2.dta, replace
log close
restore

****construct workforce composition variables
preserve
xtile age_q= age, nq(5)

tab age_q, gen(a)

gen deflator=0
replace deflator=0.86 if year==1993
replace deflator=0.87 if year==1994
replace deflator=0.89 if year==1995
replace deflator=0.91 if year==1996
replace deflator=0.93 if year==1997
replace deflator=0.95 if year==1998
replace deflator=0.97 if year==1999
replace deflator=1 if year==2000
replace deflator=1.02 if year==2001
replace deflator=1.05 if year==2002
replace deflator=1.07 if year==2003
replace deflator=1.08 if year==2004
replace deflator=1.10 if year==2005
replace deflator=1.12 if year==2006
replace deflator=1.14 if year==2007
replace deflator=1.18 if year==2008
replace deflator=1.2 if year==2009
replace deflator=1.22 if year==2010
replace deflator=1.26 if year==2011
replace deflator=1.29 if year==2012

replace lonind=lonind/deflator


bysort cvrnr year: egen akom_m=mode(akom), maxmode missing

drop akom

rename akom_m akom

bysort akom year: gen total_emp_akom=_N

***calculate workforce composition chateracteristics at the firms levell
collapse (mean) skill1 skill2 skill3 years_of_edu male age manager ml bc work_exp total_emp_akom tenure foreigner_tot foreigner_EU foreigner_nonEU foreigner_nonEU_ls foreigner_tot_ls_1 foreigner_tot_ls_2 lonind akom, by(cvrnr year) cw

sort cvrnr year

save ${data}demographic.dta, replace
restore


***calculate immigration variables at the municipality level (for the main analysis and for the robustness checks)

preserve
collapse (mean)  foreigner_nonEU_mun=foreigner_nonEU foreigner_nonEU_ls_mun=foreigner_nonEU_ls foreigner_tot_ls_1_mun=foreigner_tot_ls_1 /*
*/ foreigner_tot_ls_2_mun=foreigner_tot_ls_2 foreigner_tot_mun=foreigner_tot  foreigner_EU_mun= foreigner_EU, by(akom year) cw
sort akom year
save ${data}for_non_eu.dta, replace
restore

preserve
replace foreigner_nonEU=0 if (ieland=="5129" | ieland=="5607" | ieland=="5174" | ieland=="5609" | /*
*/  ieland=="5611" | ieland=="5154" | ieland=="5778" | ieland=="5752" | ieland=="5422" | ieland=="5153" | ieland=="5158" | ieland=="5128")
collapse (mean)  foreigner_nonEU_mun=foreigner_nonEU, by(akom year) cw
sort akom year
save ${data}for_non_eu_nneu.dta, replace
restore


preserve
bysort akom year: egen foreigner_nonEU_mun=total(foreigner_nonEU)
collapse   foreigner_nonEU_mun , by(akom year) cw
sum foreigner_nonEU_mun
sort akom year
save ${data}for_non_eu_count.dta, replace
restore


***REFUGEES AND NEW EU COUNTRIES
preserve
drop foreigner_nonEU
destring ieland, replace

tostring ieland, replace

gen foreigner_nonEU= (ieland=="5436"  | ieland=="5438" | ieland=="5289" | ieland=="5488" | ieland=="5418" | ieland=="5404" | ieland=="5152" | ieland=="5456")
replace foreigner_nonEU=1 if (ieland=="5129"  | ieland=="5607" | ieland=="5174" | ieland=="5609" /*
*/  | ieland=="5611" | ieland=="5154" | ieland=="5778" | ieland=="5752" | ieland=="5422" | ieland=="5153" | ieland=="5128" | ieland=="5158")

collapse (mean)  foreigner_nonEU_mun=foreigner_nonEU, by(akom year) cw
sort akom year
save ${data}for_non_eu_ref.dta, replace
restore


 
preserve
drop foreigner_nonEU
destring ieland, replace

tostring ieland, replace

gen foreigner_nonEU= (ieland=="5436"  | ieland=="5438" | ieland=="5289" | ieland=="5488" | ieland=="5418" | ieland=="5404" | ieland=="5152" | ieland=="5456")

collapse (mean)  foreigner_nonEU_mun=foreigner_nonEU, by(akom year) cw
sort akom year
save ${data}for_non_eu_ref2.dta, replace
restore




****WAGE REGRESSIONS AT THE INDIVIDUAL LEVEL
gen deflator=0
replace deflator=0.86 if year==1993
replace deflator=0.87 if year==1994
replace deflator=0.89 if year==1995
replace deflator=0.91 if year==1996
replace deflator=0.93 if year==1997
replace deflator=0.95 if year==1998
replace deflator=0.97 if year==1999
replace deflator=1 if year==2000
replace deflator=1.02 if year==2001
replace deflator=1.05 if year==2002
replace deflator=1.07 if year==2003
replace deflator=1.08 if year==2004
replace deflator=1.10 if year==2005
replace deflator=1.12 if year==2006
replace deflator=1.14 if year==2007
replace deflator=1.18 if year==2008
replace deflator=1.2 if year==2009
replace deflator=1.22 if year==2010
replace deflator=1.26 if year==2011
replace deflator=1.29 if year==2012


gen lonind2=((lonind/deflator))


drop if tlonkval>50
replace lonind=((timelon/deflator))

gen age2=(age)^2
gen work_exp2=(work_exp)^2

capture program drop meany
program define meany, rclass
args var spec
sum `var' if e(sample), mean
estadd scalar Meany=r(mean): `spec'
end 


preserve
keep if foreigner_nonEU==0 & foreigner_EU==0
collapse lonind, by(akom year)
sort akom year
save ${data}wage_natives.dta, replace
restore


preserve
keep if  skill2==0 & foreigner_nonEU==0 & foreigner_EU==0
collapse lonind, by(akom year)
sort akom year
save ${data}wage_natives_ls.dta, replace
restore


preserve
keep if  skill2==0 & foreigner_nonEU==1 
collapse lonind, by(akom year)
sort akom year
save ${data}wage_nonEU_ls.dta, replace
restore

preserve
collapse foreigner_nonEU_ls foreigner_nonEU lonind, by(akom year)
sort akom year
save ${data}wage_for.dta, replace
restore

replace lonind=ln(lonind+1)



destring pnr, replace

sort cvrnr year
merge cvrnr year using ${data}final_sample.dta
keep if _merge==3


reghdfe lonind foreigner_nonEU foreigner_EU age age2 male  skill1 skill2  manager ml, absorb(year akom cvrnr branche2) 
estimates store m4
meany lonind m4

esttab  m4  using ${tables}Table6_7.txt, replace style(tab)  cells(b(star fmt(%9.3f)) se(par fmt(%9.3f))) stats(Meany r2 N, fmt( %9.3f %9.3f  %9.0fc) ///
labels("Mean Y"  "R-sq" N)) starlevels(* 0.10 ** 0.05 *** 0.01) /*
*/keep(foreigner_nonEU foreigner_EU )
eststo clear





****calcualte the immigration variable by using the whole population instead of just focusing on workers

use "G:\Data\Workdata\703989\STATA FILES\idapall5.dta" if aar>="1995" & aar<="2011", clear
rename aar year
destring year, replace


sort hfaudd

merge hfaudd using ${data}udd.dta
keep if _merge==3
drop _merge

gen edu=substr(hffsp,1,2)
destring edu, replace

gen skill1=(edu==20 | edu==25 | edu==35| edu==40)
gen skill2=(edu==50 | edu==60 | edu==65 | edu==70)
gen skill3=(skill1==0 & skill2==0)


rename kom akom

rename ie_type ietype
rename opr_land ieland

keep year cvrnr akom ietype ieland skill3


destring akom, replace

recode akom (171=240) (181=230) (189=190) (205=230) (207=190) (208=210) (209=250) (211=260) (213=270) (215=270) (221=260) (225=250) /*
*/ (227=210)  (229=250) (231=219)  (235=240) (237=240) (251=350) (255=265) (257=350) (261=350) /*
*/ (263=265) (267=259) (271=336) (301=326) (303=340) (305=306) (307=370) (309=326) (311=330) (313=320) (315=316) (317=326) (319=326) /*
*/ (321=316) (323=326) (325=330) (327=306) (331=330) (333=330) (335=340) (337=340) (339=316)  (341=316) (343=306) /*
*/ (345=316) (351=320) (353=370) (355=360) (357=370) (359=360) (361=390) (363=360) (365=390) (367=360) (369=376) /*
*/ (371=376) (373=370) (375=376) (377=390) (379=360) (381=360) (383=360) (385=320) (387=376) (389=336) (391=376) (393=370) (395=376) /*
*/ (397=390) (401=400) (403=400) (405=400) (407=400) (409=400) (421=420) (423=480) (425=430) (427=479) (429=410) (431=430) (433=420) (435=479) (433=420) (435=479) (437=420) (439=440) /*
*/ (441=440) (443=492) (445=410) (447=440) (449=450) (451=410) (471=480) (473=430) (475=482) (477=430) (481=482) (483=480) (485=420) /*
*/ (487=482) (489=450) (491=420) (493=492) (495=450) (497=430) (499=420) (501=540) (503=580) (505=550) (507=540)  /*
*/ (511=510) (513=540) (515=510)  (517=550) (519=580) (521=550) (523=540) (527=575) (529=580) /*
*/ (531=550) (533=540) (535=540) (537=540) (539=580) (541=550) (543=510) (545=580) (551=530) (553=573) (555=573) (557=561) (559=575) (565=530) /*
*/ (567=561) (567=573) (569=575) (571=561) (577=573)  (603=630)  (609=615) (611=630) (613=766) (617=630) /*
*/ (619=766) (623=621) (625=756)  (629=621) (631=630) (651=657) (653=756) (655=760) (657=760) (659=760) (663=756) (667=760) (669=760) /*
*/ (673=665) (675=671) (677=657) (679=661) (681=760) (683=661) (685=657) (701=706) (703=746) (705=740) (709=710) (711=710) (713=710) (715=746) /*
*/  (721=706) (723=730) (725=707) (729=730) (731=730) (733=706) (735=707) (737=746) (739=706) (743=740) /*
*/ (745=746)  (749=740) (761=791) (763=791) (765=787) (767=710) (769=791) (771=740) (775=791) (777=779) (781=779) /*
*/ (783=779) (785=787)  (789=791)  (801=846) (803=849) (805=810) (807=810) (809=820) (811=849) (815=846) (817=851) /*
*/ (819=860) (821=860) (823=846) (827=820) (829=860) (831=851)  (835=849) (837=851) (839=860)  (841=813) (843=840) (845=840) (847=813) /*
*/ (861=820) if year<2006

**** 11 old municipalities (233, 509, 525, 601, 605, 627, 717, 719, 747, 793, 833)
***** were split into more municipalities: recode by exploiting the fact that dataset firm contains from 1998 on both the old and the new municipality kode, named
*****respectively GF_KOM_KODE and GF_KOM_KODE_2

destring cvrnr, replace
sort cvrnr year 
merge cvrnr year using ${data}akom2.dta 
drop _merge

bysort cvrnr: egen kom_ny=min( GF_KOM_KODE_2)

replace akom=kom_ny if (akom==233 | akom== 509 | akom==525 | akom==601 | akom== 605 | akom==627 | akom==717 | akom== 719 | akom==747 | akom==793 | akom== 833)


recode akom (147=101)
***drop those firms for which the municipality code cannot be identified

drop if akom==.

gen natives=(ietype=="1") | ieland=="5126" | ieland=="5130" | ieland=="5180" | ieland=="5134" | ieland=="5142" | ieland=="5150" | ieland=="5108" | ieland=="5140" | /*
*/ ieland=="5156" | ieland=="5164" | ieland=="5170"   | ieland=="5182" | ieland=="5104" | ieland=="5120"

gen foreigner_nonEU=(natives!=1)

collapse (mean)  foreigner_nonEU_mun=foreigner_nonEU, by(akom year) cw

sort akom year

save ${data}for_non_eu_pop.dta, replace




****TRADE AND FIRM-LEVEL DATA**************************************************


****TRADE AND FIRM-LEVEL DATA**************************************************


****TRADE AND FIRM-LEVEL DATA**************************************************


****open import_ind_c which is constructed by running the SAS pgrm "Trade_data"

use ${data}import_ind_c.dta, replace

****keep if year>=1995 as we don't have firm-level accounting before 

keep if year>=1995

replace vrd_imp=0 if vrd_imp==.

rename HS4_imp HS6_imp
rename HS4_exp HS6_exp
rename HS4_dom_1 HS6_dom_1

***construct a dummy for narrow offshoring (firms' sales (incl. domestic) or exports in the same 6 digit product as in imports)
gen narrow= (HS6_imp==HS6_exp) 
replace narrow=1 if  (HS6_imp==HS6_dom_1) 

****gen alternative def for placebo test in Table 9

gen narrow2= (HS6_imp!=HS6_exp) 
replace narrow2=1 if  (HS6_imp!=HS6_dom_1) 
***gen broad offshoring (aggregated over all destinations)

bysort cvrnr year: egen broad_off=total(vrd_imp)
replace broad_off=0 if broad_off==.


***gen narrow offshoring (aggregated over all destinations)

bysort cvrnr year: egen narrow_off=total(vrd_imp) if narrow==1
replace narrow_off=0 if narrow_off==.

bysort cvrnr year: egen narrow_off2=total(vrd_imp) if narrow2==1
replace narrow_off2=0 if narrow_off2==.
 
preserve
drop if land=="CZ" | land=="EE" | land=="LV" | land=="HU" | land=="LT" | land=="PL" | land=="SK" | land=="SI" | land=="CY" | land=="MT" | land=="BG" | land=="RO"
collapse broad_off narrow_off narrow_off2, by(cvrnr year)
destring cvrnr, replace
sort cvrnr year
save ${data}import_ind_d_no_neu.dta, replace
restore


collapse broad_off narrow_off narrow_off2, by(cvrnr year)
sort cvrnr year
save ${data}import_ind_d.dta, replace



*DO THE SAME FOR THE DESTINATION SPECIFIC DATASET

*DO THE SAME FOR THE DESTINATION SPECIFIC DATASET

*DO THE SAME FOR THE DESTINATION SPECIFIC DATASET

*DO THE SAME FOR THE DESTINATION SPECIFIC DATASET

*DO THE SAME FOR THE DESTINATION SPECIFIC DATASET



***open import_ind_c which is constructed by running the SAS pgrm "Trade_data"

use ${data}import_ind_c.dta, replace

****keep if year>=1995 as we don't have firm-level accounting before 

keep if year>=1995 


replace vrd_imp=0 if vrd_imp==.

rename HS4_imp HS6_imp
rename HS4_exp HS6_exp
rename HS4_dom_1 HS6_dom_1

***construct a dummy for narrow offshoring (firm's sales (incl. domestic) or exports in the same 6 digit product as in imports)
gen narrow= (HS6_imp==HS6_exp) 
replace narrow=1 if  (HS6_imp==HS6_dom_1) 



***gen broad offshoring (disaggregated over all destinations)
***gen broad offshoring (disaggregated over all destinations)	
replace land="FR" if land=="MC"	
replace land="FR" if land=="GF"
replace land="FR" if land=="YT"
replace land="FR" if land=="MQ"
replace land="FR" if land=="RE"
replace land="FR" if land=="GP"
replace land="BE" if land=="LU"
replace land="RS" if land=="ME"
replace land="RS" if land=="HR"
replace land="RS" if land=="SI"
replace land="RS" if land=="MK"
replace land="RS" if land=="CS"
replace land="RS" if land=="XS"
replace land="RS" if land=="XK"
replace land="RS" if land=="BA"
replace land="RS" if land=="YU"
replace land="ZR" if land=="CD"
replace land="ES" if land=="AD"
replace land="ES" if land=="GI"
replace land="ES" if land=="EA"
replace land="ES" if land=="IC"
replace land="ES" if land=="XC"
replace land="ES" if land=="XL"
replace land="GB" if land=="UK"
replace land="US" if land=="UM"
replace land="US" if land=="PR"
replace land="US" if land=="VI"
replace land="US" if land=="AS"
replace land="IT" if land=="VA"
replace land="IT" if land=="SM"
replace land="MA" if land=="EH"
replace land="CH" if land=="LI"
replace land="NO" if land=="SJ"
replace land="AR" if land=="FK"
replace land="TP" if land=="TL"

****drop unknown destinations, uncertain destinations
***unknown destinations
drop if land=="QU" | land=="QV" | land=="QW" | land=="QX"
***antartic
drop if land=="AQ"
***polar area
drop if land=="XR"
***bonventøen (polar area)
drop if land=="BV"
***sandwich islands (polar area)
drop if land=="GS"
***british india ocean territory
drop if land=="IO"
***french occupied as polar area
drop if land=="TF"
***american oceanian territory 
drop if land=="XA"
***australian oceanian territory
drop if land=="XO"


****recode some to make compatible with the officals iso from country_codes dataset
***GAZA
replace land="IL" if land=="XP"
***recode canarian islands as spain
replace land="ES" if land=="XB"
***recode montenegro as serbia
replace land="RS" if land=="XM"
***recode guam as USA
replace land="US" if land=="GU"
***heard island AS AUSTRALIA
replace land="AU" if land=="HM"
*** NEW ZEALAND islands 
replace land="NZ" if land=="XZ"

bysort cvrnr land year: egen broad_off_dis=total(vrd_imp)
replace broad_off_dis=0 if broad_off_dis==.


***gen narrow offshoring (disaggregated over all destinations)

bysort cvrnr land year: egen narrow_off_dis=total(vrd_imp) if narrow==1
replace narrow_off_dis=0 if narrow_off_dis==.


collapse broad_off_dis narrow_off_dis, by(cvrnr land year)
sort cvrnr year
drop if year==2013
save ${data}import_ind_e.dta, replace



****merge with the country official classification

use ${data}import_ind_e.dta, clear

drop if land==""


******gen a fake cvrnr with all possible destinations attached and append to the dataset
preserve
gen check=19999999999999999
collapse check, by(land)
rename check cvrnr2
gen year=.
sort land 
drop if land==""
gen id_dest2=_n
count
save ${data}fake_cvrnr.dta, replace
restore




***create a balanced panel firm*destination for each year and then append to get an unbalanced firm*destination*year dataset

forvalues i=1995(1)2011{
preserve
keep if year==`i'

append using ${data}fake_cvrnr.dta
replace year=`i' if year==.
sort land  id_dest2
encode land, gen(land2)
drop land
rename land2 land
bysort cvrnr land: gen id=_n
keep if id==1
drop id

destring cvrnr, replace
tsset cvrnr land
****expand the dataset such that each firm has all possible destinations
tsfill, full

replace broad_off_dis=0 if broad_off_dis==.
replace narrow_off_dis=0 if narrow_off_dis==.


replace year=`i' if year==.
bysort cvrnr land: gen id=_n
keep if id==1
drop id
drop  id_dest2
save ${data}import_ind_e`i'.dta, replace
restore
}


*****gen the final dataset country-specific
use ${data}import_ind_e1995.dta, clear


forvalues i=1996(1)2011{
append using ${data}import_ind_e`i'.dta
}

save ${data}import_ind_e2_will.dta, replace


use  ${data}import_ind_e2_will.dta, clear
keep if broad_off_dis>0
keep cvrnr year
bysort cvrnr year: gen destinations=_N 
replace destinations=destinations-1
collapse destinations, by(cvrnr year)
sort cvrnr year
save ${data}desti_will.dta, replace





****let's start with the aggregated over all destinations analysis
****let's start with the aggregated over all destinations analysis
****let's start with the aggregated over all destinations analysis
****let's start with the aggregated over all destinations analysis
****let's start with the aggregated over all destinations analysis



use ${data}import_ind_d.dta, clear

***merge with the accountings

destring cvrnr, replace

sort cvrnr year

merge cvrnr year using ${data}industry.dta
drop if _merge==1
drop _merge

keep if year>=1995 & year<=2011


sort cvrnr year
merge cvrnr year using ${data}foreign_ownership.dta
keep if _merge==3
drop _merge
destring GF_VIRKFKOD, replace
gen foreign=GF_VIRKFKOD==160 | GF_VIRKFKOD==170 |  GF_VIRKFKOD==180 | GF_VIRKFKOD==190 | GF_VIRKFKOD==196 | GF_VIRKFKOD==200 | GF_VIRKFKOD==210 | GF_VIRKFKOD==220 | GF_VIRKFKOD==290 | GF_VIRKFKOD==291
sort cvrnr year


sort cvrnr year
merge cvrnr year using ${data}antalarb.dta
keep if _merge==3
drop _merge


gen  multi=antalarb>1


***gen useful firm-level controls: productivity, size etc...
gen deflator=0
replace deflator=0.86 if year==1993
replace deflator=0.87 if year==1994
replace deflator=0.89 if year==1995
replace deflator=0.91 if year==1996
replace deflator=0.93 if year==1997
replace deflator=0.95 if year==1998
replace deflator=0.97 if year==1999
replace deflator=1 if year==2000
replace deflator=1.02 if year==2001
replace deflator=1.05 if year==2002
replace deflator=1.07 if year==2003
replace deflator=1.08 if year==2004
replace deflator=1.10 if year==2005
replace deflator=1.12 if year==2006
replace deflator=1.14 if year==2007
replace deflator=1.18 if year==2008
replace deflator=1.2 if year==2009
replace deflator=1.22 if year==2010
replace deflator=1.26 if year==2011
replace deflator=1.29 if year==2012






destring GF_BRANCHE_07, gen(bra07)
format bra07 %06.0f
replace bra07=-999 if bra07==.
bysort GF_BRANCHE_03 cvrnr: egen bra07_max=max(bra07)
replace bra07=bra07_max if bra07==-999
replace bra07=. if bra07==-999


gen branche=bra07

tostring branche, gen(branche1)
gen branche2=substr(branche1, 1,2)

****calculate industry classification 1-digit 2007

gen bra=.

***manufacturing
replace bra=1 if branche2>="10" & branche2<="33"
***electricity, water
replace bra=2 if branche2=="35" & branche2<="39"
***construction
replace bra=3 if branche2>="41" & branche2<="43"
***wholesale and retail trade
replace bra=4 if branche2>="45" & branche2<="47"
***transport
replace bra=5 if branche2>="49" & branche2<="56"
***information and comunication
replace bra=6 if branche2>="58" & branche2<="63"
***financial services & real estate & business service
replace bra=7 if branche2>="64" & branche2<="75"
***other services
replace bra=8 if branche2>="77" & branche2<="97"


gen sales=GF_OMS


drop if sales<0
drop if GF_EGUL<0

replace sales=sales/deflator
replace GF_EGUL=GF_EGUL/deflator
replace  GF_E_GUL= GF_E_GUL/deflator



****merge with firm size: with dataset has been created by globid from: G:\Data\Workdata\703989\STATA FILES\idfi
sort cvrnr year

merge cvrnr year using ${data}firm_size2.dta
keep if _merge==3 

drop _merge

gen tot_emp=fansh

gen size1=(tot_emp>=1 & tot_emp<20)
gen size2=(tot_emp>=20 & tot_emp<50)
gen size3=(tot_emp>=50)


gen productivity=ln(sales/tot_emp)
gen capital_int=.
replace capital_int=ln(GF_EGUL/tot_emp) if year>=1999
replace capital_int=ln(GF_E_GUL/tot_emp) if year<1999




****merge with workforce characteristicts retrived from the matched employer-employee database
sort cvrnr year

merge cvrnr year using ${data}demographic.dta

keep if _merge==3

drop _merge



***MERGE WITH MUNICIPALITY LEVEL SHARE OF NON-EU FOREIGNERS
sort akom year

merge akom year using ${data}for_non_eu.dta

keep if _merge==3

drop _merge

***MERGE WITH ITS INSTRUMENT 

sort akom year

merge akom year using ${data}iv_off5.dta

keep if _merge==3

drop _merge

sum s_hat, det

corr s_hat foreigner_nonEU_mun


***gen intensive and extensive margins of offshoring

replace broad_off=0 if broad_off==.
replace narrow_off=0 if narrow_off==.

gen ext_broad_off=(broad_off>0)
gen ext_narrow_off=(narrow_off>0)

gen int_broad_off=ln(broad_off)
gen int_narrow_off=ln(narrow_off)


****drop firms with fewer than 2 employees and whose productivity is missing
drop if branche2=="."
drop if year<1995
drop if year==2012
drop if tot_emp<2
drop if productivity==.
drop if capital_int==.
drop if bra==2


sort cvrnr year
merge cvrnr year using ${data}desti_will.dta
keep if _merge==1 | _merge==3
drop _merge

replace destinations=0 if destinations==.




***drop duplicates
bysort cvrnr year: gen id=_n
keep if id==1

xtset cvrnr year


gen foreign_o=l.foreign
drop foreign
rename foreign_o foreign
gen prod=l.productivity
gen cap=l.capital_int
gen for=l.foreigner_nonEU_mun
gen for_iv=l.s_hat


gen region=0
replace region=1 if akom>=101 & akom<=250
replace region=1 if akom==260 | akom==270 | akom==400
replace region=2 if akom==253 | akom==259 | akom==265 | akom==269
replace region=2 if akom>=306 & akom<=390
replace region=3 if akom>=410 & akom<=580
replace region=3 if akom==607  | akom==621 | akom==630
replace region=4 if akom>=615 & akom<=766 
replace region=4 if akom==779 | akom==791
replace region=5 if akom==773 | akom==787
replace region=5 if akom>=810 & akom<=860


gen manu=(bra==1)

bysort cvrnr: egen manu_max=max(manu)

destring branche2, replace


***drop those firms that move to another akom for the cluster
bysort cvrnr: egen akom_m=max(akom)
gen change=(akom!=akom_m)
drop if change==1

tsset cvrnr year

***lag workeforce composition characteristics

local cntrl "years_of_edu male age  work_exp tenure multi size1 size2 size3"
foreach var in `cntrl'{
gen `var'_lag=l.`var'
drop `var' 
rename `var'_lag `var'
}
save ${data}offshoring_fin_will.dta, replace


use ${data}offshoring_fin_will.dta, clear


****merge with housing prices at the regional level for a robustness check
sort region year
merge region year using ${data}housing_price.dta
keep if _merge==3
drop _merge



****add immigration variable  in levels for a robustness check

sort akom year
merge akom year using ${data}for_non_eu_count.dta
keep if _merge==3

drop _merge

sort akom year
merge akom year using ${data}iv_off_count.dta
keep if _merge==3




global cntrl0 "prod  foreign multi size1 size2 size3"
global cntrl1 "prod cap  foreign multi size1 size2 size3"
global cntrl2 "prod cap years_of_edu male age  work_exp tenure  foreign multi size1 size2 size3"
global cntrl4 "prod cap foreign"




capture program drop meany
program define meany, rclass
args var spec
sum `var' if e(sample), mean
estadd scalar Meany=r(mean): `spec'
end 

tsset cvrnr year

***create immigration variable and its instrument in level (instead of shares) for a robustness check
gen for2=l.foreigner_nonEU_mun_count
gen for2_iv=l.F_hat_no_eu

*****EXTENSIVE MARGINS

capture log close
log using ${logs}Table4_first_stage.log, replace

xi: reg ext_narrow_off for  i.year i.branche2 i.akom, cluster(akom) 
estimates store m0
meany ext_narrow_off m0

preserve
keep if e(sample)
capture log close
log using ${logs}Table2.log, replace
sum ext_narrow_off int_narrow_off  ext_broad_off for foreigner_nonEU_mun_count  male age years_of_edu tenure work_exp prod size1 size2 size3 cap multi foreign 
bysort year: gen tot_firms=_N
sum tot_firms
log close
restore

xi: probit ext_narrow_off for  i.year i.branche2 i.akom, cluster(akom) 
margins, dydx(for) atmeans predict(p) post
estimates store m1
meany ext_narrow_off m1

xi: reghdfe ext_narrow_off for, cluster(akom) absorb(year branche2 akom cvrnr)
estimates store m2
meany ext_narrow_off m2


xi: reghdfe ext_narrow_off for $cntrl2, cluster(akom) absorb(year branche2 akom cvrnr)
estimates store m2a
meany ext_narrow_off m2a


xi: reghdfe ext_narrow_off (for=for_iv), absorb(year branche2 akom) cluster(akom) ffirst stages(first)
estimates store m3
meany ext_narrow_off m3
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace



xi: reghdfe ext_narrow_off (for=for_iv), absorb(year branche2 akom cvrnr) cluster(akom) ffirst stages(first)
estimates store m3a
meany ext_narrow_off m3a
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace


xi: reghdfe ext_narrow_off  $cntrl2 (for=for_iv) , absorb(year branche2 akom cvrnr) cluster(akom) ffirst stages(first)
estimates store m4
meany ext_narrow_off m4
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace

xi: reghdfe ext_narrow_off $cntrl2 (for2=for2_iv) , absorb(year branche2 akom cvrnr) cluster(akom) ffirst stages(first)
estimates store m5
meany ext_narrow_off m5
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace


xi: reghdfe ext_narrow_off $cntrl2 house_price (for=for_iv) , absorb(year branche2 akom cvrnr) cluster(akom) ffirst stages(first)
estimates store m5a
meany ext_narrow_off m5a
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace


local cntrl " ext_narrow_off for for_iv prod cap years_of_edu male age  work_exp tenure  foreign multi size1 size2 size3"
foreach var in `cntrl'{
gen `var'_lag=l.`var'
gen `var'd=`var'-`var'_lag
}

ivreg2 ext_narrow_offd  prodd capd years_of_edud maled aged  work_expd tenured  foreignd multid size1d size2d size3d (ford=for_ivd),   cluster(akom) first
estimates store m6
meany ext_narrow_offd m6
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace

esttab m0 m1 m2 m2a m3 m3a m4 m5 m5a m6 using ${tables}Table4plusrobustness.txt, replace style(tab)  cells(b(star fmt(%9.6f)) se(par fmt(%9.6f))) stats(Meany F p r2 N, fmt(%9.3f  %9.3f  %9.3f %9.3f  %9.0fc) ///
labels("Mean Y" "F-stat" "p-value"  "R-sq" N)) starlevels(* 0.10 ** 0.05 *** 0.01) /*
*/keep( for for2 ford $cntrl4)
eststo clear
log close



*****INTENSIVE MARGINS


capture log close
log using ${logs}Table5_first_stage.log, replace

xi: reg int_narrow_off for  i.year i.branche2  i.akom, cluster(akom) 
estimates store m1
meany int_narrow_off m1

xi: reghdfe int_narrow_off for, cluster(akom)  absorb(year branche2 akom cvrnr)
estimates store m2
meany int_narrow_off m2


xi: reghdfe int_narrow_off for $cntrl2, cluster(akom)  absorb(year branche2 akom cvrnr)
estimates store m2a
meany int_narrow_off m2a

xi: reghdfe int_narrow_off (for=for_iv), absorb(year branche2 akom) cluster(akom) ffirst stages(first)
estimates store m3
meany int_narrow_off m3
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace


xi: reghdfe int_narrow_off (for=for_iv), absorb(year branche2 akom cvrnr) cluster(akom) ffirst stages(first)
estimates store m3a
meany int_narrow_off m3a
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace


xi: reghdfe int_narrow_off $cntrl2 (for=for_iv), absorb(year branche2 akom cvrnr) cluster(akom) ffirst stages(first)
estimates store m4
meany int_narrow_off m4
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace

xi: reghdfe int_narrow_off $cntrl2 house_price (for=for_iv), absorb(year branche2 akom cvrnr) cluster(akom) ffirst stages(first)
estimates store m4a
meany int_narrow_off m4a
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace

xi: reghdfe int_narrow_off $cntrl2 (for2=for2_iv), absorb(year branche2 akom cvrnr) cluster(akom) ffirst stages(first)
estimates store m5
meany int_narrow_off m5
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace


local cntrl "int_narrow_off  "
foreach var in `cntrl'{
gen `var'_lag=l.`var'
gen `var'd=`var'-`var'_lag
}


ivreg2 int_narrow_offd prodd capd years_of_edud maled aged  work_expd tenured  foreignd multid size1d size2d size3d (ford=for_ivd), cluster(akom) first 
estimates store m6
meany int_narrow_offd m6
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace


esttab m1 m2 m2a m3 m3a m4 m4a m5 m6  using ${tables}Table5plusrobustness.txt, replace style(tab)  cells(b(star fmt(%9.6f)) se(par fmt(%9.6f))) stats(Meany F p r2 N, fmt(%9.3f  %9.3f  %9.3f %9.3f  %9.0fc) ///
labels("Mean Y" "F-stat" "p-value"  "R-sq" N)) starlevels(* 0.10 ** 0.05 *** 0.01) /*
*/keep( for for2 ford $cntrl4)
eststo clear
log close


****MECHANISM: DOES THE SHARE OF NON EU INCREASE THE SHARE OF NON ROUTINE OCCUP. AT THE FIRM LEVEL
drop _merge

sort cvrnr year

merge cvrnr year using ${data}firm_rou.dta
keep if _merge==3
drop _merge



bysort cvrnr: egen max_off=max(ext_narrow_off)


xi: reghdfe routine_job $cntrl2 (for=for_iv) if max_off==1, absorb(year branche2  cvrnr) cluster(akom)  ffirst stages(first)
estimates store m1
meany routine_job m1
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace

xi: reghdfe routine_job $cntrl2 (for=for_iv) if max_off==0, absorb(year branche2  cvrnr) cluster(akom) ffirst stages(first)
estimates store m2
meany routine_job m2
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace



esttab m1 m2 using ${tables}Table6_3_4.txt, replace style(tab)  cells(b(star fmt(%9.3f)) se(par fmt(%9.3f))) stats(Meany F p r2 N, fmt(%9.3f  %9.3f  %9.3f %9.3f  %9.0fc) ///
labels("Mean Y" "F-stat" "p-value"  "R-sq" N)) starlevels(* 0.10 ** 0.05 *** 0.01) /*
*/keep( for)
eststo clear


*****ROBUSTNESS CHECKS *****ROBUSTNESS CHECKS *****ROBUSTNESS CHECKS*****ROBUSTNESS CHECKS*****ROBUSTNESS CHECKS

*****ROBUSTNESS CHECKS *****ROBUSTNESS CHECKS *****ROBUSTNESS CHECKS*****ROBUSTNESS CHECKS*****ROBUSTNESS CHECKS

*****ROBUSTNESS CHECKS *****ROBUSTNESS CHECKS *****ROBUSTNESS CHECKS*****ROBUSTNESS CHECKS*****ROBUSTNESS CHECKS

*****ROBUSTNESS CHECKS *****ROBUSTNESS CHECKS *****ROBUSTNESS CHECKS*****ROBUSTNESS CHECKS*****ROBUSTNESS CHECKS


****NO FIRMS WITH FEWER THAN 50 EMPLOYEES
use ${data}offshoring_fin_will.dta, clear

capture log close
log using ${logs}Table4_no_small_firms_first_stage.log, replace

xi: reghdfe ext_narrow_off $cntrl2 (for=for_iv) if size3==1, absorb(year branche2 cvrnr) cluster(akom) ffirst stages(first)
estimates store m4
meany ext_narrow_off m4
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace

esttab  m4  using ${tables}Table4_nosmallfirms.txt, replace style(tab)  cells(b(star fmt(%9.6f)) se(par fmt(%9.6f))) stats(Meany F p r2 N, fmt(%9.3f  %9.3f  %9.3f %9.3f  %9.0fc) ///
labels("Mean Y" "F-stat" "p-value"  "R-sq" N)) starlevels(* 0.10 ** 0.05 *** 0.01) /*
*/keep( for)
eststo clear
log close


****NO IMPUTED

preserve
drop if jkod=="R"

capture log close
log using ${logs}Table4_no_imputed_first_stage.log, replace

xi: reghdfe ext_narrow_off $cntrl2 (for=for_iv) , absorb(year branche2 cvrnr) cluster(akom) ffirst stages(first)
estimates store m4
meany ext_narrow_off m4
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace

esttab  m4  using ${tables}Table4_noimputed.txt, replace style(tab)  cells(b(star fmt(%9.6f)) se(par fmt(%9.6f))) stats(Meany F p r2 N, fmt(%9.3f  %9.3f  %9.3f %9.3f  %9.0fc) ///
labels("Mean Y" "F-stat" "p-value"  "R-sq" N)) starlevels(* 0.10 ** 0.05 *** 0.01) /*
*/keep( for)
eststo clear
log close

restore

gen for_sq=(for)^2
gen for_iv_sq=(for_iv)^2

capture log close
log using ${logs}Table4_nonlinear_first_stage.log, replace

xi: reghdfe ext_narrow_off $cntrl2 (for for_sq=for_iv for_iv_sq) , absorb(year branche2 cvrnr) cluster(akom) ffirst stages(first)
estimates store m4
meany ext_narrow_off m4
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace

esttab  m4  using ${tables}Table4_nonlinear.txt, replace style(tab)  cells(b(star fmt(%9.6f)) se(par fmt(%9.6f))) stats(Meany F p r2 N, fmt(%9.3f  %9.3f  %9.3f %9.3f  %9.0fc) ///
labels("Mean Y" "F-stat" "p-value"  "R-sq" N)) starlevels(* 0.10 ** 0.05 *** 0.01) /*
*/keep( for for_sq)
eststo clear
log close



****SPECIFICATION WITH LONGER LAGS 
****SPECIFICATION WITH LONGER LAGS 
****SPECIFICATION WITH LONGER LAGS 
****SPECIFICATION WITH LONGER LAGS 

tsset cvrnr year
gen for_lag2=l.for
gen for_lag3=l2.for
gen for_iv_lag2=l.for_iv
gen for_iv_lag3=l2.for_iv


capture log close
log using ${logs}Table4_lags_first_stage.log, replace
xi: reghdfe ext_narrow_off $cntrl2 (for_lag2=for_iv_lag2) , absorb(year branche2 cvrnr) cluster(akom) ffirst stages(first)
estimates store m1
meany ext_narrow_off m1
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace

xi: reghdfe ext_narrow_off $cntrl2 (for_lag3=for_iv_lag3) , absorb(year branche2 cvrnr) cluster(akom) ffirst stages(first)
estimates store m2
meany ext_narrow_off m2
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace

esttab  m1 m2  using ${tables}Table4_additional_lag.txt, replace style(tab)  cells(b(star fmt(%9.3f)) se(par fmt(%9.3f))) stats(Meany F p r2 N, fmt(%9.3f  %9.3f  %9.3f %9.3f  %9.0fc) ///
labels("Mean Y" "F-stat" "p-value"  "R-sq" N)) starlevels(* 0.10 ** 0.05 *** 0.01) /*
*/keep( for_lag2 for_lag3)
eststo clear
log close




****WITH FIRMS THAT CHANGE MUNICIPALITY

preserve

use ${data}import_ind_d.dta, clear

***merge with the accountings

destring cvrnr, replace

sort cvrnr year

merge cvrnr year using ${data}industry.dta
drop if _merge==1
drop _merge

keep if year>=1995 & year<=2011


sort cvrnr year
merge cvrnr year using ${data}foreign_ownership.dta
keep if _merge==3
drop _merge
destring GF_VIRKFKOD, replace
gen foreign=GF_VIRKFKOD==160 | GF_VIRKFKOD==170 |  GF_VIRKFKOD==180 | GF_VIRKFKOD==190 | GF_VIRKFKOD==196 | GF_VIRKFKOD==200 | GF_VIRKFKOD==210 | GF_VIRKFKOD==220 | GF_VIRKFKOD==290 | GF_VIRKFKOD==291
sort cvrnr year


sort cvrnr year
merge cvrnr year using ${data}antalarb.dta
keep if _merge==3
drop _merge


gen  multi=antalarb>1


***gen useful firm-level controls: productivity, size etc...
gen deflator=0
replace deflator=0.86 if year==1993
replace deflator=0.87 if year==1994
replace deflator=0.89 if year==1995
replace deflator=0.91 if year==1996
replace deflator=0.93 if year==1997
replace deflator=0.95 if year==1998
replace deflator=0.97 if year==1999
replace deflator=1 if year==2000
replace deflator=1.02 if year==2001
replace deflator=1.05 if year==2002
replace deflator=1.07 if year==2003
replace deflator=1.08 if year==2004
replace deflator=1.10 if year==2005
replace deflator=1.12 if year==2006
replace deflator=1.14 if year==2007
replace deflator=1.18 if year==2008
replace deflator=1.2 if year==2009
replace deflator=1.22 if year==2010
replace deflator=1.26 if year==2011
replace deflator=1.29 if year==2012






destring GF_BRANCHE_07, gen(bra07)
format bra07 %06.0f
replace bra07=-999 if bra07==.
bysort GF_BRANCHE_03 cvrnr: egen bra07_max=max(bra07)
replace bra07=bra07_max if bra07==-999
replace bra07=. if bra07==-999


gen branche=bra07

tostring branche, gen(branche1)
gen branche2=substr(branche1, 1,2)

****calculate industry classification 1-digit 2007

gen bra=.

***manufacturing
replace bra=1 if branche2>="10" & branche2<="33"
***electricity, water
replace bra=2 if branche2=="35" & branche2<="39"
***construction
replace bra=3 if branche2>="41" & branche2<="43"
***wholesale and retail trade
replace bra=4 if branche2>="45" & branche2<="47"
***transport
replace bra=5 if branche2>="49" & branche2<="56"
***information and comunication
replace bra=6 if branche2>="58" & branche2<="63"
***financial services & real estate & business service
replace bra=7 if branche2>="64" & branche2<="75"
***other services
replace bra=8 if branche2>="77" & branche2<="97"


gen sales=GF_OMS


drop if sales<0
drop if GF_EGUL<0

replace sales=sales/deflator
replace GF_EGUL=GF_EGUL/deflator
replace  GF_E_GUL= GF_E_GUL/deflator



****merge with firm size: with dataset has been created by globid from: G:\Data\Workdata\703989\STATA FILES\idfi
sort cvrnr year

merge cvrnr year using ${data}firm_size2.dta
keep if _merge==3 

drop _merge

gen tot_emp=fansh

gen size1=(tot_emp>=1 & tot_emp<20)
gen size2=(tot_emp>=20 & tot_emp<50)
gen size3=(tot_emp>=50)


gen productivity=ln(sales/tot_emp)
gen capital_int=.
replace capital_int=ln(GF_EGUL/tot_emp) if year>=1999
replace capital_int=ln(GF_E_GUL/tot_emp) if year<1999




****merge with workforce characteristicts retrived from the matched employer-employee database
sort cvrnr year

merge cvrnr year using ${data}demographic.dta

keep if _merge==3

drop _merge



***MERGE WITH MUNICIPALITY LEVEL SHARE OF NON-EU FOREIGNERS
sort akom year

merge akom year using ${data}for_non_eu.dta

keep if _merge==3

drop _merge

***MERGE WITH ITS INSTRUMENT 

sort akom year

merge akom year using ${data}iv_off5.dta

keep if _merge==3

drop _merge

sum s_hat, det

corr s_hat foreigner_nonEU_mun


***gen intensive and extensive margins of offshoring

replace broad_off=0 if broad_off==.
replace narrow_off=0 if narrow_off==.

gen ext_broad_off=(broad_off>0)
gen ext_narrow_off=(narrow_off>0)

gen int_broad_off=ln(broad_off)
gen int_narrow_off=ln(narrow_off)

****summary stats
drop if branche2=="."
drop if year<1995
drop if year==2012
drop if tot_emp<2
drop if productivity==.
drop if capital_int==.
drop if bra==2


sort cvrnr year
merge cvrnr year using ${data}desti_will.dta
keep if _merge==1 | _merge==3
drop _merge

replace destinations=0 if destinations==.

***SPECIFIC STATS ON OFFSHORING

gen man=bra==1

bysort cvrnr year: gen id=_n
keep if id==1

xtset cvrnr year


gen foreign_o=l.foreign
drop foreign
rename foreign_o foreign
gen prod=l.productivity
gen cap=l.capital_int
gen for=l.foreigner_nonEU_mun
gen for_iv=l.s_hat


gen region=0
replace region=1 if akom>=101 & akom<=250
replace region=1 if akom==260 | akom==270 | akom==400
replace region=2 if akom==253 | akom==259 | akom==265 | akom==269
replace region=2 if akom>=306 & akom<=390
replace region=3 if akom>=410 & akom<=580
replace region=3 if akom==607  | akom==621 | akom==630
replace region=4 if akom>=615 & akom<=766 
replace region=4 if akom==779 | akom==791
replace region=5 if akom==773 | akom==787
replace region=5 if akom>=810 & akom<=860


sort region year

merge region year using ${data}housing_price.dta
keep if _merge==3
drop _merge


gen manu=(bra==1)

bysort cvrnr: egen manu_max=max(manu)


gen akomc=akom*year
destring branche2, replace



tsset cvrnr year
local cntrl "years_of_edu male age  work_exp tenure multi size1 size2 size3"
foreach var in `cntrl'{
gen `var'_lag=l.`var'
drop `var' 
rename `var'_lag `var'
}



******add immigration variable  in levels

sort akom year
merge akom year using ${data}for_non_eu_count.dta
keep if _merge==3

drop _merge

sort akom year
merge akom year using ${data}iv_off_count.dta
keep if _merge==3



global cntrl0 "prod  foreign multi size1 size2 size3"
global cntrl1 "prod cap  foreign multi size1 size2 size3"
global cntrl2 "prod cap years_of_edu male age  work_exp tenure  foreign multi size1 size2 size3"
global cntrl4 "prod cap foreign"



tsset cvrnr year

gen for2=l.foreigner_nonEU_mun_count
gen for2_iv=l.F_hat_no_eu

*****EXTENSIVE MARGINS



xi: reghdfe ext_narrow_off $cntrl2 (for=for_iv) , absorb(year branche2 cvrnr) cluster(akom) ffirst stages(first)
estimates store m4
meany ext_narrow_off m4
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace


esttab m4  using ${tables}Table10_3.txt, replace style(tab)  cells(b(star fmt(%9.6f)) se(par fmt(%9.6f))) stats(Meany F p r2 N, fmt(%9.3f  %9.3f  %9.3f %9.3f  %9.0fc) ///
labels("Mean Y" "F-stat" "p-value"  "R-sq" N)) starlevels(* 0.10 ** 0.05 *** 0.01) /*
*/keep( for )
eststo clear


restore



****SUBSAMPLES OF INDUSTRIES


****SUBSAMPLES OF INDUSTRIES


****SUBSAMPLES OF INDUSTRIES

preserve
keep if manu_max==1
xi: reghdfe ext_narrow_off $cntrl2 (for=for_iv), absorb(year branche2 cvrnr) cluster(akom) 
estimates store m1
meany ext_narrow_off m1
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace

esttab m1   using ${tables}Table4_man.txt, replace style(tab)  cells(b(star fmt(%9.3f)) se(par fmt(%9.3f))) stats(Meany F p r2 N, fmt(%9.3f  %9.3f  %9.3f %9.3f  %9.0fc) ///
labels("Mean Y" "F-stat" "p-value"  "R-sq" N)) starlevels(* 0.10 ** 0.05 *** 0.01) /*
*/keep( for)
eststo clear

xi: reghdfe for for_iv $cntrl2, absorb(year branche2  cvrnr) cluster(akom)  
estimates store m1

esttab m1  using ${tables}Table4_man.txt, replace style(tab)  cells(b(star fmt(%9.3f)) se(par fmt(%9.3f))) starlevels(* 0.10 ** 0.05 *** 0.01) /*
*/keep( for_iv)
eststo clear
restore

preserve
drop  if branche2=="45" | branche2=="46" | branche2=="47"
xi: reghdfe ext_narrow_off $cntrl2 (for=for_iv), absorb(year branche2 akom cvrnr) cluster(akom) 
estimates store m1
meany ext_narrow_off m1
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace

esttab m1   using ${tables}Table4_no_retail.txt, replace style(tab)  cells(b(star fmt(%9.3f)) se(par fmt(%9.3f))) stats(Meany F p r2 N, fmt(%9.3f  %9.3f  %9.3f %9.3f  %9.0fc) ///
labels("Mean Y" "F-stat" "p-value"  "R-sq" N)) starlevels(* 0.10 ** 0.05 *** 0.01) /*
*/keep( for)
eststo clear

restore





 egen mean_cap=mean(capital_int)
bysort branche2:  egen mean_cap2=mean(capital_int) if year==2003
replace mean_cap2=0 if mean_cap2==.
bysort branche2:  egen max_cap2=max(mean_cap2) 



gen lab_int_ind=0
replace lab_int_ind=1 if max_cap2<mean_cap




forvalues i=0(1)1{


xi: reghdfe ext_narrow_off $cntrl2 (for=for_iv) if lab_int_ind==`i' , absorb(year branche2 akom cvrnr) cluster(akom) 
estimates store m1
meany ext_narrow_off m1
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace


esttab m1   using ${tables}Table4_labint`i'.txt, replace style(tab)  cells(b(star fmt(%9.3f)) se(par fmt(%9.3f))) stats(Meany F p r2 N, fmt(%9.3f  %9.3f  %9.3f %9.3f  %9.0fc) ///
labels("Mean Y" "F-stat" "p-value"  "R-sq" N)) starlevels(* 0.10 ** 0.05 *** 0.01) /*
*/keep( for)
eststo clear



}



***industries with high and low offshoring

drop lab_int_ind mean_cap mean_cap2 max_cap2

egen mean_cap=mean(narrow_off)
bysort branche2:  egen mean_cap2=mean(narrow_off) if year==2003
replace mean_cap2=0 if mean_cap2==.
bysort branche2:  egen max_cap2=max(mean_cap2) 

gen lab_int_ind=0
replace lab_int_ind=1 if max_cap2>mean_cap


forvalues i=0(1)1{
xi: reghdfe ext_narrow_off $cntrl2 (for=for_iv) if lab_int_ind==`i' , absorb(year branche2 akom cvrnr) cluster(akom) 
estimates store m1
meany ext_narrow_off m1
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace

esttab m1  using ${tables}Table4_highoff`i'.txt, replace style(tab)  cells(b(star fmt(%9.3f)) se(par fmt(%9.3f))) stats(Meany F p r2 N, fmt(%9.3f  %9.3f  %9.3f %9.3f  %9.0fc) ///
labels("Mean Y" "F-stat" "p-value"  "R-sq" N)) starlevels(* 0.10 ** 0.05 *** 0.01) /*
*/keep( for)
eststo clear




}



*****intensive margin (only offshoring firms all years)
bysort cvrnr: egen total_off=total(ext_narrow_off)
bysort cvrnr: gen total_years=_N

gen alw_off=total_off==total_years

xi: reghdfe int_narrow_off $cntrl2 (for=for_iv) if alw_off==1, absorb(year branche2 cvrnr) cluster(akom) 
estimates store m1
meany int_narrow_off m1
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace

esttab m1 using ${tables}Table9_5.txt, replace style(tab)  cells(b(star fmt(%9.3f)) se(par fmt(%9.3f))) stats(Meany F p r2 N, fmt(%9.3f  %9.3f  %9.3f %9.3f  %9.0fc) ///
labels("Mean Y" "F-stat" "p-value"  "R-sq" N)) starlevels(* 0.10 ** 0.05 *** 0.01) /*
*/keep( for)
eststo clear



****different definition of intensive margin
preserve

drop int_narrow_off
gen int_narrow_off=ln((narrow_off/tot_emp))

xi: reghdfe int_narrow_off $cntrl2 (for=for_iv), absorb(year branche2 cvrnr) cluster(akom) 
estimates store m1
meany int_narrow_off m1
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace

esttab m1 using ${tables}Table9_4.txt, replace style(tab)  cells(b(star fmt(%9.3f)) se(par fmt(%9.3f))) stats(Meany F p r2 N, fmt(%9.3f  %9.3f  %9.3f %9.3f  %9.0fc) ///
labels("Mean Y" "F-stat" "p-value"  "R-sq" N)) starlevels(* 0.10 ** 0.05 *** 0.01) /*
*/keep( for)
eststo clear

restore

*******ESPERIAN FDI-BASED DEFINITION OF OFFSHORING

*******ESPERIAN FDI-BASED DEFINITION OF OFFSHORING

*******ESPERIAN FDI-BASED DEFINITION OF OFFSHORING

*******ESPERIAN FDI-BASED DEFINITION OF OFFSHORING
drop _merge

preserve

sort cvrnr year

merge cvrnr year using ${data}esperian_aggr.dta
drop if _merge==2
replace offshoring_aggr=0 if offshoring_aggr==.
replace offshoring_aggr=. if ext_narrow_off==1 & _merge==1


xi: reghdfe offshoring_aggr $cntrl2 (for=for_iv)  , absorb(year branche2 cvrnr) cluster(akom) 
estimates store m1
meany offshoring_aggr m1
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace


esttab m1 using ${tables}Table9_3.txt, replace style(tab)  cells(b(star fmt(%9.3f)) se(par fmt(%9.3f))) stats(Meany F p r2 N, fmt(%9.3f  %9.3f  %9.3f %9.3f  %9.0fc) ///
labels("Mean Y" "F-stat" "p-value"  "R-sq" N)) starlevels(* 0.10 ** 0.05 *** 0.01) /*
*/keep( for)
eststo clear



restore


*******ALTERNATIVE DEFINITION: BROAD OFFSHORING

xi: reghdfe ext_broad_off $cntrl2 (for=for_iv) , absorb(year branche2  cvrnr) cluster(akom) 
estimates store m1
meany ext_broad_off m1
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace

esttab m1   using ${tables}Table9_1.txt, replace style(tab)  cells(b(star fmt(%9.3f)) se(par fmt(%9.3f))) stats(Meany F p r2 N, fmt(%9.3f  %9.3f  %9.3f %9.3f  %9.0fc) ///
labels("Mean Y" "F-stat" "p-value"  "R-sq" N)) starlevels(* 0.10 ** 0.05 *** 0.01) /*
*/keep( for)
eststo clear



***PLACEBO TEST (IMPORTS OF GOODS NOT PRODUCED NOR EXPORTED)
preserve

drop ext_narrow_off
gen ext_narrow_off=(narrow_off2>0 & narrow_off2!=.)
replace ext_narrow_off=0 if narrow_off2==.

xi: reghdfe ext_narrow_off $cntrl2 (for=for_iv) , absorb(year branche2 akom cvrnr) cluster(akom) 
estimates store m1
meany ext_narrow_off m1
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace


esttab m1   using ${tables}Table9_2.txt, replace style(tab)  cells(b(star fmt(%9.3f)) se(par fmt(%9.3f))) stats(Meany F p r2 N, fmt(%9.3f  %9.3f  %9.3f %9.3f  %9.0fc) ///
labels("Mean Y" "F-stat" "p-value"  "R-sq" N)) starlevels(* 0.10 ** 0.05 *** 0.01) /*
*/keep( for)
eststo clear

restore




*****NO COPENHAGEN
preserve

drop if akom==101

xi: reghdfe ext_narrow_off $cntrl2 (for=for_iv) , absorb(year branche2 akom cvrnr) cluster(akom) 
estimates store m1
meany ext_narrow_off m1
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace

esttab m1   using ${tables}Table10_1.txt, replace style(tab)  cells(b(star fmt(%9.3f)) se(par fmt(%9.3f))) stats(Meany F p r2 N, fmt(%9.3f  %9.3f  %9.3f %9.3f  %9.0fc) ///
labels("Mean Y" "F-stat" "p-value"  "R-sq" N)) starlevels(* 0.10 ** 0.05 *** 0.01) /*
*/keep( for)
eststo clear

restore

*****NO MULTI-ESTABLISHMENTS

preserve

drop if multi==1

xi: reghdfe ext_narrow_off $cntrl2 (for=for_iv) , absorb(year branche2 akom cvrnr) cluster(akom) 
estimates store m1
meany ext_narrow_off m1
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace

esttab m1   using ${tables}Table10_2.txt, replace style(tab)  cells(b(star fmt(%9.3f)) se(par fmt(%9.3f))) stats(Meany F p r2 N, fmt(%9.3f  %9.3f  %9.3f %9.3f  %9.0fc) ///
labels("Mean Y" "F-stat" "p-value"  "R-sq" N)) starlevels(* 0.10 ** 0.05 *** 0.01) /*
*/keep( for)
eststo clear

restore

***stats on multi-establishment firms
bysort year: egen total_emp=total(tot_emp)
gen tot_emp2=tot_emp
replace tot_emp2=0 if multi==0
bysort year: egen total_emp_multi=total(tot_emp2) 

gen share_emp=total_emp_multi/total_emp

bysort year: egen total_sales=total(sales)
gen tot_sales2=sales
replace tot_sales2=0 if multi==0
bysort year: egen total_sales_multi=total(tot_sales2) 

gen share_sales=total_sales_multi/total_sales

bysort year: egen total_import=total(broad_off)
gen tot_import2=broad_off
replace tot_import2=0 if multi==0
bysort year: egen total_import_multi=total(tot_import2) 

gen share_import=total_import_multi/total_import


****DIFFERENT DEFINITIONS OF MIGRANT WORKERS
****DIFFERENT DEFINITIONS OF MIGRANT WORKERS
****DIFFERENT DEFINITIONS OF MIGRANT WORKERS


****DIFFERENT DEFINITIONS OF MIGRANT WORKERS
****DIFFERENT DEFINITIONS OF MIGRANT WORKERS
****DIFFERENT DEFINITIONS OF MIGRANT WORKERS

***ALL MIGRANT WORKERS (EU/NON-EU) INSTRUMENTED WITH NON-EU WORKERS (1)

preserve
drop for
tsset cvrnr year
gen for=l.foreigner_tot_mun


xi: reghdfe ext_narrow_off $cntrl2 (for=for_iv) , absorb(year branche2 cvrnr) cluster(akom) 
estimates store m1
meany ext_narrow_off m1
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace


esttab m1   using ${tables}Table8_1.txt, replace style(tab)  cells(b(star fmt(%9.3f)) se(par fmt(%9.3f))) stats(Meany F p r2 N, fmt(%9.3f  %9.3f  %9.3f %9.3f  %9.0fc) ///
labels("Mean Y" "F-stat" "p-value"  "R-sq" N)) starlevels(* 0.10 ** 0.05 *** 0.01) /*
*/keep( for)
eststo clear


restore

***NEW EU/REFUGEE IMMIGRANTS INSTRUMENTED WITH REFUGEES/NEW EU WORKERS (2)

preserve
drop for
drop s_hat for_iv

sort akom year
merge akom year using ${data}iv_off_refneu.dta
keep if _merge==3
drop _merge
tsset cvrnr year
gen for_iv=l.s_hat

sort akom year
merge akom year using  ${data}for_non_eu_ref.dta
keep if _merge==3
drop _merge
tsset cvrnr year
gen for=l.foreigner_nonEU_mun



xi: reghdfe ext_narrow_off $cntrl2 (for=for_iv) , absorb(year branche2  cvrnr) cluster(akom) 
estimates store m1
meany ext_narrow_off m1
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace


esttab m1   using ${tables}Table8_3.txt, replace style(tab)  cells(b(star fmt(%9.3f)) se(par fmt(%9.3f))) stats(Meany F p r2 N, fmt(%9.3f  %9.3f  %9.3f %9.3f  %9.0fc) ///
labels("Mean Y" "F-stat" "p-value"  "R-sq" N)) starlevels(* 0.10 ** 0.05 *** 0.01) /*
*/keep( for)
eststo clear

restore



***REFUGEE IMMIGRANTS INSTRUMENTED WITH REFUGEES WORKERS (2)

preserve
drop for
drop s_hat for_iv
drop  foreigner_nonEU_mun

sort akom year
merge akom year using ${data}iv_off_refneu2.dta, keep(s_hat)
keep if _merge==3
drop _merge
tsset cvrnr year
gen for_iv=l.s_hat

sort akom year
merge akom year using  ${data}for_non_eu_ref2.dta
keep if _merge==3
drop _merge
tsset cvrnr year
gen for=l.foreigner_nonEU_mun


xi: reghdfe ext_narrow_off $cntrl2 (for=for_iv) , absorb(year branche2  cvrnr) cluster(akom) 
estimates store m1
meany ext_narrow_off m1
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace

esttab m1  using ${tables}Table8_4.txt, replace style(tab)  cells(b(star fmt(%9.3f)) se(par fmt(%9.3f))) stats(Meany F p r2 N, fmt(%9.3f  %9.3f  %9.3f %9.3f  %9.0fc) ///
labels("Mean Y" "F-stat" "p-value"  "R-sq" N)) starlevels(* 0.10 ** 0.05 *** 0.01) /*
*/keep( for)
eststo clear

restore


***ALL MIGRANT LOW SKILLED WORKERS (EU/NON-EU) INSTRUMENTED WITH NON EU LOW SKILLED WORKERS (4)

preserve
drop for
tsset cvrnr year
gen for=l.foreigner_tot_ls_1_mun

drop s_hat for_iv

sort akom year
merge akom year using ${data}iv_off_ls5.dta
keep if _merge==3
drop _merge
tsset cvrnr year
gen for_iv=l.s_hat


xi: reghdfe ext_narrow_off $cntrl2 (for=for_iv) , absorb(year branche2 cvrnr) cluster(akom) 
estimates store m1
meany ext_narrow_off m1
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace



esttab m1  using ${tables}Table8_5.txt, replace style(tab)  cells(b(star fmt(%9.3f)) se(par fmt(%9.3f))) stats(Meany F p r2 N, fmt(%9.3f  %9.3f  %9.3f %9.3f  %9.0fc) ///
labels("Mean Y" "F-stat" "p-value"  "R-sq" N)) starlevels(* 0.10 ** 0.05 *** 0.01) /*
*/keep( for)
eststo clear


restore


*******FIRM LEVEL SHARE OF NON EU MIGRANT WORKERS
preserve
drop for 
xtset cvrnr year
gen for=l.foreigner_nonEU



xi: reghdfe ext_narrow_off $cntrl2 (for=for_iv) , absorb(year branche2 cvrnr) cluster(akom) 
estimates store m1
meany ext_narrow_off m1
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace




esttab m1  using ${tables}Table8_6.txt, replace style(tab)  cells(b(star fmt(%9.3f)) se(par fmt(%9.3f))) stats(Meany F p r2 N, fmt(%9.3f  %9.3f  %9.3f %9.3f  %9.0fc) ///
labels("Mean Y" "F-stat" "p-value"  "R-sq" N)) starlevels(* 0.10 ** 0.05 *** 0.01) /*
*/keep( for)
eststo clear

restore


****MECHANISM: check that firms that ever offshore hire more migrant workers

bysort cvrnr: egen max_off=max(ext_narrow_off)


xi: reghdfe foreigner_nonEU $cntrl2 (for=for_iv) if max_off==1, absorb(year branche2  cvrnr) cluster(akom)  ffirst stages(first)
estimates store m1
meany foreigner_nonEU m1
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace

xi: reghdfe foreigner_nonEU $cntrl2 (for=for_iv) if max_off==0, absorb(year branche2  cvrnr) cluster(akom) ffirst stages(first)
estimates store m2
meany foreigner_nonEU m2
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace



esttab m1 m2 using ${tables}Table6_1_2.txt, replace style(tab)  cells(b(star fmt(%9.3f)) se(par fmt(%9.3f))) stats(Meany F p r2 N, fmt(%9.3f  %9.3f  %9.3f %9.3f  %9.0fc) ///
labels("Mean Y" "F-stat" "p-value"  "R-sq" N)) starlevels(* 0.10 ** 0.05 *** 0.01) /*
*/keep( for)
eststo clear


*****EXCLUDE NEW EU FROM BOTH OFFSHORING AND IMMIGRATION

drop for for_iv ext_narrow_off _merge

sort akom year
merge akom year using ${data}iv_off_no_neu.dta
keep if _merge==3
drop _merge

tsset cvrnr year
gen for_iv=l.s_hat

sort akom year
merge akom year using ${data}for_non_eu_nneu.dta
keep if _merge==3
drop _merge

tsset cvrnr year
gen for=l.foreigner_nonEU_mun

sort cvrnr year
merge cvrnr year using ${data}import_ind_d_no_neu.dta
drop if _merge==2
drop _merge

replace narrow_off=0 if narrow_off==.

gen ext_narrow_off=(narrow_off>0)


xi: reghdfe ext_narrow_off $cntrl2 (for=for_iv) , absorb(year branche2 cvrnr) cluster(akom) ffirst stages(first)
estimates store m1
meany ext_narrow_off m1
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace

esttab m1  using ${tables}Table4_no_new_eu.txt, replace style(tab)  cells(b(star fmt(%9.3f)) se(par fmt(%9.3f))) stats(Meany F p r2 N, fmt(%9.3f  %9.3f  %9.3f %9.3f  %9.0fc) ///
labels("Mean Y" "F-stat" "p-value"  "R-sq" N)) starlevels(* 0.10 ** 0.05 *** 0.01) /*
*/keep( for)
eststo clear





******PRE-TREND TEST

******PRE-TREND TEST

******PRE-TREND TEST

******PRE-TREND TEST




******PRE-TREND TEST

******PRE-TREND TEST

******PRE-TREND TEST

******PRE-TREND TEST




******PRE-TREND TEST

******PRE-TREND TEST

******PRE-TREND TEST

******PRE-TREND TEST



******PRE-TREND TEST

******PRE-TREND TEST

******PRE-TREND TEST

******PRE-TREND TEST


use "G:\Data\Workdata\703989\STATA FILES\uhdi.dta", clear

rename aar year

keep if year>=1993 & year<=1995

keep if ie==2

gen hs4=substr(vare,1,4)

replace vrd=0 if vrd==.

bysort cvrnr hs4 year: egen total_export=total(vrd)

collapse total_export, by(cvrnr hs4 year)

sort cvrnr hs4 year

save ${data}total_export_pre.dta, replace

use "G:\Data\Workdata\703989\STATA FILES\uhdi.dta", clear

rename aar year

keep if year>=1995 & year<=2011

keep if ie==2

gen hs4=substr(vare,1,4)

replace vrd=0 if vrd==.

bysort cvrnr hs4 year: egen total_export=total(vrd)

collapse total_export, by(cvrnr hs4 year)

sort cvrnr hs4 year

save ${data}total_export_post.dta, replace

use "G:\Data\Workdata\703989\STATA FILES\uhdi.dta", clear

rename aar year

keep if year>=1993 & year<=1995

keep if ie==1

gen hs4=substr(vare,1,4)

replace vrd=0 if vrd==.

bysort cvrnr hs4 year: egen total_import=total(vrd)

collapse total_import, by(cvrnr hs4 year)

sort cvrnr hs4 year

merge cvrnr hs4 year using ${data}total_export_pre.dta

keep if _merge==3

bysort cvrnr year: egen narrow_offshoring=total(total_import)

collapse narrow_offshoring, by(cvrnr year)

sort cvrnr year

****retrive info on akom

sort cvrnr year

merge cvrnr year using ${data}akom.dta 

keep if year>=1993 & year<=1995

gen narrow_offshoring_ext=(_merge==3 & narrow_offshoring>0)

replace narrow_offshoring_ext=0 if narrow_offshoring_ext==.
replace narrow_offshoring=0 if narrow_offshoring==.


drop _merge 

bysort cvrnr year: gen id=_n

keep if id==1


recode akom (171=240) (181=230) (189=190) (205=230) (207=190) (208=210) (209=250) (211=260) (213=270) (215=270) (221=260) (225=250) /*
*/ (227=210)  (229=250) (231=219)  (235=240) (237=240) (251=350) (255=265) (257=350) (261=350) /*
*/ (263=265) (267=259) (271=336) (301=326) (303=340) (305=306) (307=370) (309=326) (311=330) (313=320) (315=316) (317=326) (319=326) /*
*/ (321=316) (323=326) (325=330) (327=306) (331=330) (333=330) (335=340) (337=340) (339=316)  (341=316) (343=306) /*
*/ (345=316) (351=320) (353=370) (355=360) (357=370) (359=360) (361=390) (363=360) (365=390) (367=360) (369=376) /*
*/ (371=376) (373=370) (375=376) (377=390) (379=360) (381=360) (383=360) (385=320) (387=376) (389=336) (391=376) (393=370) (395=376) /*
*/ (397=390) (401=400) (403=400) (405=400) (407=400) (409=400) (421=420) (423=480) (425=430) (427=479) (429=410) (431=430) (433=420) (435=479) (433=420) (435=479) (437=420) (439=440) /*
*/ (441=440) (443=492) (445=410) (447=440) (449=450) (451=410) (471=480) (473=430) (475=482) (477=430) (481=482) (483=480) (485=420) /*
*/ (487=482) (489=450) (491=420) (493=492) (495=450) (497=430) (499=420) (501=540) (503=580) (505=550) (507=540)  /*
*/ (511=510) (513=540) (515=510)  (517=550) (519=580) (521=550) (523=540) (527=575) (529=580) /*
*/ (531=550) (533=540) (535=540) (537=540) (539=580) (541=550) (543=510) (545=580) (551=530) (553=573) (555=573) (557=561) (559=575) (565=530) /*
*/ (567=561) (567=573) (569=575) (571=561) (577=573)  (603=630)  (609=615) (611=630) (613=766) (617=630) /*
*/ (619=766) (623=621) (625=756)  (629=621) (631=630) (651=657) (653=756) (655=760) (657=760) (659=760) (663=756) (667=760) (669=760) /*
*/ (673=665) (675=671) (677=657) (679=661) (681=760) (683=661) (685=657) (701=706) (703=746) (705=740) (709=710) (711=710) (713=710) (715=746) /*
*/  (721=706) (723=730) (725=707) (729=730) (731=730) (733=706) (735=707) (737=746) (739=706) (743=740) /*
*/ (745=746)  (749=740) (761=791) (763=791) (765=787) (767=710) (769=791) (771=740) (775=791) (777=779) (781=779) /*
*/ (783=779) (785=787)  (789=791)  (801=846) (803=849) (805=810) (807=810) (809=820) (811=849) (815=846) (817=851) /*
*/ (819=860) (821=860) (823=846) (827=820) (829=860) (831=851)  (835=849) (837=851) (839=860)  (841=813) (843=840) (845=840) (847=813) /*
*/ (861=820)

**** 11 old municipalities (233, 509, 525, 601, 605, 627, 717, 719, 747, 793, 833)
***** were split into more municipalities: recode by exploiting the fact that dataset firm contains from 1998 on both the old and the new municipality kode, named
*****respectively GF_KOM_KODE and GF_KOM_KODE_2


destring cvrnr, replace
sort cvrnr year 
merge cvrnr year using ${data}akom2.dta 
drop _merge

bysort cvrnr: egen kom_ny=min( GF_KOM_KODE_2)

replace akom=kom_ny if (akom==233 | akom== 509 | akom==525 | akom==601 | akom== 605 | akom==627 | akom==717 | akom== 719 | akom==747 | akom==793 | akom== 833)


recode akom (147=101)
***drop those firms for which the municipality code cannot be identified

drop if akom==.

bysort akom year: egen narrow_offshoring_int=total(narrow_offshoring)

replace narrow_offshoring_int=ln(narrow_offshoring_int)

collapse narrow_offshoring_ext narrow_offshoring_int, by(akom year)

gen narrow_offshoring_ext_95=narrow_offshoring_ext if year==1995
replace narrow_offshoring_ext_95=0 if  narrow_offshoring_ext_95==.
bysort akom: egen narrow_ext_95=max(narrow_offshoring_ext_95)


gen narrow_offshoring_ext_93=narrow_offshoring_ext if year==1993
replace narrow_offshoring_ext_93=0 if  narrow_offshoring_ext_93==.
bysort akom: egen narrow_ext_93=max(narrow_offshoring_ext_93)


gen narrow_offshoring_int_95=narrow_offshoring_int if year==1995
replace narrow_offshoring_int_95=0 if  narrow_offshoring_int_95==.
bysort akom: egen narrow_int_95=max(narrow_offshoring_int_95)
gen narrow_offshoring_int_93=narrow_offshoring_int if year==1993
replace narrow_offshoring_int_93=0 if  narrow_offshoring_int_93==.
bysort akom: egen narrow_int_93=max(narrow_offshoring_int_93) 

gen change_ext=(narrow_ext_95-narrow_ext_93)

gen change_int=narrow_int_95-narrow_int_93

collapse change_ext change_int, by(akom)

preserve
use "G:\Data\Workdata\703989\STATA FILES\uhdi.dta", clear

rename aar year

keep if year>=1995 & year<=2011

keep if ie==1

gen hs4=substr(vare,1,4)

replace vrd=0 if vrd==.

bysort cvrnr hs4 year: egen total_import=total(vrd)

collapse total_import, by(cvrnr hs4 year)

sort cvrnr hs4 year

merge cvrnr hs4 year using ${data}total_export_post.dta

keep if _merge==3

bysort cvrnr year: egen narrow_offshoring=total(total_import)

collapse narrow_offshoring, by(cvrnr year)

sort cvrnr year

****retrive info on akom

sort cvrnr year

merge cvrnr year using ${data}akom.dta 

keep if year>=1995 & year<=2011

gen narrow_offshoring_ext=(_merge==3 & narrow_offshoring>0)

replace narrow_offshoring_ext=0 if narrow_offshoring_ext==.
replace narrow_offshoring=0 if narrow_offshoring==.


drop _merge 

bysort cvrnr year: gen id=_n

keep if id==1


recode akom (171=240) (181=230) (189=190) (205=230) (207=190) (208=210) (209=250) (211=260) (213=270) (215=270) (221=260) (225=250) /*
*/ (227=210)  (229=250) (231=219)  (235=240) (237=240) (251=350) (255=265) (257=350) (261=350) /*
*/ (263=265) (267=259) (271=336) (301=326) (303=340) (305=306) (307=370) (309=326) (311=330) (313=320) (315=316) (317=326) (319=326) /*
*/ (321=316) (323=326) (325=330) (327=306) (331=330) (333=330) (335=340) (337=340) (339=316)  (341=316) (343=306) /*
*/ (345=316) (351=320) (353=370) (355=360) (357=370) (359=360) (361=390) (363=360) (365=390) (367=360) (369=376) /*
*/ (371=376) (373=370) (375=376) (377=390) (379=360) (381=360) (383=360) (385=320) (387=376) (389=336) (391=376) (393=370) (395=376) /*
*/ (397=390) (401=400) (403=400) (405=400) (407=400) (409=400) (421=420) (423=480) (425=430) (427=479) (429=410) (431=430) (433=420) (435=479) (433=420) (435=479) (437=420) (439=440) /*
*/ (441=440) (443=492) (445=410) (447=440) (449=450) (451=410) (471=480) (473=430) (475=482) (477=430) (481=482) (483=480) (485=420) /*
*/ (487=482) (489=450) (491=420) (493=492) (495=450) (497=430) (499=420) (501=540) (503=580) (505=550) (507=540)  /*
*/ (511=510) (513=540) (515=510)  (517=550) (519=580) (521=550) (523=540) (527=575) (529=580) /*
*/ (531=550) (533=540) (535=540) (537=540) (539=580) (541=550) (543=510) (545=580) (551=530) (553=573) (555=573) (557=561) (559=575) (565=530) /*
*/ (567=561) (567=573) (569=575) (571=561) (577=573)  (603=630)  (609=615) (611=630) (613=766) (617=630) /*
*/ (619=766) (623=621) (625=756)  (629=621) (631=630) (651=657) (653=756) (655=760) (657=760) (659=760) (663=756) (667=760) (669=760) /*
*/ (673=665) (675=671) (677=657) (679=661) (681=760) (683=661) (685=657) (701=706) (703=746) (705=740) (709=710) (711=710) (713=710) (715=746) /*
*/  (721=706) (723=730) (725=707) (729=730) (731=730) (733=706) (735=707) (737=746) (739=706) (743=740) /*
*/ (745=746)  (749=740) (761=791) (763=791) (765=787) (767=710) (769=791) (771=740) (775=791) (777=779) (781=779) /*
*/ (783=779) (785=787)  (789=791)  (801=846) (803=849) (805=810) (807=810) (809=820) (811=849) (815=846) (817=851) /*
*/ (819=860) (821=860) (823=846) (827=820) (829=860) (831=851)  (835=849) (837=851) (839=860)  (841=813) (843=840) (845=840) (847=813) /*
*/ (861=820)

**** 11 old municipalities (233, 509, 525, 601, 605, 627, 717, 719, 747, 793, 833)
***** were split into more municipalities: recode by exploiting the fact that dataset firm contains from 1998 on both the old and the new municipality kode, named
*****respectively GF_KOM_KODE and GF_KOM_KODE_2


destring cvrnr, replace
sort cvrnr year 
merge cvrnr year using ${data}akom2.dta 
drop _merge

bysort cvrnr: egen kom_ny=min( GF_KOM_KODE_2)

replace akom=kom_ny if (akom==233 | akom== 509 | akom==525 | akom==601 | akom== 605 | akom==627 | akom==717 | akom== 719 | akom==747 | akom==793 | akom== 833)


recode akom (147=101)
***drop those firms for which the municipality code cannot be identified

drop if akom==.


bysort akom year: egen narrow_offshoring_int=total(narrow_offshoring)

replace narrow_offshoring_int=ln(narrow_offshoring_int)

collapse narrow_offshoring_ext narrow_offshoring_int, by(akom year)

gen narrow_offshoring_ext_11=narrow_offshoring_ext if year==2011
replace narrow_offshoring_ext_11=0 if  narrow_offshoring_ext_11==.
bysort akom: egen narrow_ext_11=max(narrow_offshoring_ext_11)

gen narrow_offshoring_int_11=narrow_offshoring_int if year==2011
replace narrow_offshoring_int_11=0 if  narrow_offshoring_int_11==.
bysort akom: egen narrow_int_11=max(narrow_offshoring_int_11)


gen narrow_offshoring_ext_95=narrow_offshoring_ext if year==1995
replace narrow_offshoring_ext_95=0 if  narrow_offshoring_ext_95==.
bysort akom: egen narrow_ext_95=max(narrow_offshoring_ext_95)

gen narrow_offshoring_int_95=narrow_offshoring_int if year==1995
replace narrow_offshoring_int_95=0 if  narrow_offshoring_int_95==.
bysort akom: egen narrow_int_95=max(narrow_offshoring_int_95)


gen narrow_offshoring_ext_01=narrow_offshoring_ext if year==2001
replace narrow_offshoring_ext_01=0 if  narrow_offshoring_ext_01==.
bysort akom: egen narrow_ext_01=max(narrow_offshoring_ext_01)

gen narrow_offshoring_int_01=narrow_offshoring_int if year==2001
replace narrow_offshoring_int_01=0 if  narrow_offshoring_int_01==.
bysort akom: egen narrow_int_01=max(narrow_offshoring_int_01)



gen narrow_offshoring_ext_07=narrow_offshoring_ext if year==2007
replace narrow_offshoring_ext_07=0 if  narrow_offshoring_ext_07==.
bysort akom: egen narrow_ext_07=max(narrow_offshoring_ext_07)

gen narrow_offshoring_int_07=narrow_offshoring_int if year==2007
replace narrow_offshoring_int_07=0 if  narrow_offshoring_int_07==.
bysort akom: egen narrow_int_07=max(narrow_offshoring_int_07)


gen change_ext2=(narrow_ext_11-narrow_ext_95)

gen change_int2=narrow_int_11-narrow_int_95

gen change_ext2b=(narrow_ext_01-narrow_ext_95)

gen change_int2b=narrow_int_01-narrow_int_95

gen change_ext2c=(narrow_ext_07-narrow_ext_01)

gen change_int2c=narrow_int_07-narrow_int_01

gen change_ext2d=(narrow_ext_11-narrow_ext_07)

gen change_int2d=narrow_int_11-narrow_int_07

collapse change_ext2  change_int2 change_ext2b  change_int2b change_ext2c  change_int2c change_ext2d  change_int2d, by(akom)

sum change_ext2  change_int2

sort akom 

save ${data}akom_2005_2011.dta, replace

restore

preserve

use ${data}offshoring_fin_will.dta, clear

keep if year==1995

rename total_emp_akom total_emp

collapse productivity capital_int  years_of_edu  male age  work_exp tenure  foreign multi size1 size2 size3 total_emp, by(akom)

sort akom

save ${data}akom_char_1995.dta, replace

restore

preserve

use ${data}pre_wage.dta, clear

gen tot_emp_1995=tot_emp if year==1995
replace tot_emp_1995=0 if  tot_emp_1995==.
bysort akom: egen tot_emp_95=max(tot_emp_1995)

gen tot_emp_1993=tot_emp if year==1993
replace tot_emp_1993=0 if  tot_emp_1993==.
bysort akom: egen tot_emp_93=max(tot_emp_1993)

gen change_emp=(tot_emp_95-tot_emp_93)

gen wage_1995=lonind if year==1995
replace wage_1995=0 if  wage_1995==.
bysort akom: egen wage_95=max(wage_1995)

gen wage_1993=lonind if year==1993
replace wage_1993=0 if  wage_1993==.
bysort akom: egen wage_93=max(wage_1993)

gen change_wage=(wage_95-wage_93)

collapse change_emp change_wage, by(akom)

save ${data}pre_wage_akom.dta, replace

restore


preserve

use ${data}offshoring_fin_will.dta, clear


sort akom year
merge akom year using ${data}for_non_eu_count.dta
keep if _merge==3

drop _merge

sort akom year
merge akom year using ${data}iv_off_count.dta
keep if _merge==3

 

drop for_iv for
rename s_hat for_iv


replace foreigner_nonEU_mun_count=ln(foreigner_nonEU_mun_count+1)
replace F_hat_no_eu=ln(F_hat_no_eu+1)

rename  foreigner_nonEU_mun for
rename  foreigner_EU_mun for_eu
rename  foreigner_nonEU_mun_count for_count
rename F_hat_no_eu for_iv_count


gen for_iv_1995=for_iv if year==1995
replace for_iv_1995=0 if  for_iv_1995==.
bysort akom: egen fore_iv_1995=max(for_iv_1995)

gen for_iv_2011=for_iv if year==2011
replace for_iv_2011=0 if  for_iv_2011==.
bysort akom: egen fore_iv_2011=max(for_iv_2011)

gen change_iv_1995_2011=(fore_iv_2011-fore_iv_1995)


gen for_iv_2001=for_iv if year==2001
replace for_iv_2001=0 if  for_iv_2001==.
bysort akom: egen fore_iv_2001=max(for_iv_2001)


gen change_iv_2001_1995=(fore_iv_2001-fore_iv_1995)

gen for_iv_2007=for_iv if year==2007
replace for_iv_2007=0 if  for_iv_2007==.
bysort akom: egen fore_iv_2007=max(for_iv_2007)


gen change_iv_2007_2001=(fore_iv_2007-fore_iv_2001)

gen change_iv_2011_2007=(fore_iv_2011-fore_iv_2007)




gen for_iv_1995_count=for_iv_count if year==1995
replace for_iv_1995_count=0 if  for_iv_1995_count==.
bysort akom: egen fore_iv_1995_count=max(for_iv_1995_count)

gen for_iv_2011_count=for_iv_count if year==2011
replace for_iv_2011_count=0 if  for_iv_2011_count==.
bysort akom: egen fore_iv_2011_count=max(for_iv_2011_count)

gen change_iv_1995_2011_count=fore_iv_2011_count-fore_iv_1995_count



gen for_1995=for if year==1995
replace for_1995=0 if  for_1995==.
bysort akom: egen fore_1995=max(for_1995)

gen for_2011=for if year==2011
replace for_2011=0 if  for_2011==.
bysort akom: egen fore_2011=max(for_2011)

gen change_1995_2011=(fore_2011-fore_1995)


gen for_1995_count=for_count if year==1995
replace for_1995_count=0 if  for_1995_count==.
bysort akom: egen fore_1995_count=max(for_1995_count)

gen for_2011_count=for_count if year==2011
replace for_2011_count=0 if  for_2011_count==.
bysort akom: egen fore_2011_count=max(for_2011_count)

gen change_1995_2011_count=(fore_2011_count-fore_1995_count)


drop for_1995 fore_1995 for_2011 fore_2011

gen for_1995=for_eu if year==1995
replace for_1995=0 if  for_1995==.
bysort akom: egen fore_1995=max(for_1995)

gen for_2011=for_eu if year==2011
replace for_2011=0 if  for_2011==.
bysort akom: egen fore_2011=max(for_2011)

gen change_1995_2011_eu=(fore_2011-fore_1995)

collapse change_iv_1995_2011 change_iv_2001_1995 change_iv_2007_2001 change_iv_2011_2007 change_1995_2011 change_1995_2011_eu change_iv_1995_2011_count change_1995_2011_count, by(akom)

sort akom

save ${data}akom_iv_1995_2011.dta, replace

restore




sort akom 
merge akom using ${data}pre_wage_akom.dta
keep if _merge==3
drop _merge


sort akom 
merge akom using ${data}akom_char_1995.dta
keep if _merge==3
drop _merge


sort akom 
merge akom using ${data}akom_iv_2000_2011.dta
keep if _merge==3
drop _merge

sort akom 
merge akom using ${data}akom_iv_2005_2011.dta
keep if _merge==3
drop _merge


sort akom 
merge akom using ${data}akom_iv_1995_2011.dta
keep if _merge==3
drop _merge


sort akom 
merge akom using ${data}akom_2005_2011.dta
keep if _merge==3
drop _merge


rename productivity prod

rename capital_int cap

sort akom 
merge akom using ${data}inflow.dta
keep if _merge==3
drop _merge


sort akom 
merge akom using ${data}outflow_g.dta
keep if _merge==3
drop _merge



sort akom 
merge akom using ${data}wage_g.dta
keep if _merge==3
drop _merge


xi: reg change_ext change_iv_1995_2011 prod cap years_of_edu male age work_exp tenure multi size1 size2 size3  [pw=total_emp], robust 
eststo
xi: reg change_int change_iv_1995_2011 prod cap years_of_edu male age work_exp tenure  multi size1 size2 size3  [pw=total_emp], robust  
eststo
xi: reg change_emp change_iv_1995_2011 prod cap years_of_edu male age work_exp tenure multi size1 size2 size3  [pw=total_emp], robust 
eststo
xi: reg change_wage change_iv_1995_2011 prod cap years_of_edu male age work_exp tenure  multi size1 size2 size3  [pw=total_emp], robust  
eststo
xi: reg change_1995_2011_eu change_iv_1995_2011 prod cap years_of_edu male age work_exp tenure multi size1 size2 size3  [pw=total_emp], robust 
eststo
xi: reg change_1995_2011 change_iv_1995_2011 prod cap years_of_edu male age work_exp tenure multi size1 size2 size3  [pw=total_emp], robust 
eststo
xi: reg change_ext2 change_iv_1995_2011 prod cap years_of_edu male age work_exp tenure multi size1 size2 size3  [pw=total_emp], robust 
eststo
xi: reg change_int2 change_iv_1995_2011 prod cap years_of_edu male age work_exp tenure  multi size1 size2 size3  [pw=total_emp], robust  
eststo


esttab using ${tables}Table3.txt, replace cells(b(star fmt(%9.3f)) se(par fmt(%9.3f))) starlevels(* 0.10 ** 0.05 *** 0.01) /*
*/keep(change_iv_1995_2011) r2
eststo clear


xi: reg change_ext change_iv_2001_1995 prod cap years_of_edu male age work_exp tenure multi size1 size2 size3  [pw=total_emp], robust 
eststo
xi: reg change_int change_iv_2001_1995 prod cap years_of_edu male age work_exp tenure  multi size1 size2 size3  [pw=total_emp], robust  
eststo
xi: reg change_ext change_iv_2007_2001 prod cap years_of_edu male age work_exp tenure multi size1 size2 size3  [pw=total_emp], robust 
eststo
xi: reg change_int change_iv_2007_2001 prod cap years_of_edu male age work_exp tenure  multi size1 size2 size3  [pw=total_emp], robust  
eststo
xi: reg change_ext change_iv_2011_2007 prod cap years_of_edu male age work_exp tenure multi size1 size2 size3  [pw=total_emp], robust 
eststo
xi: reg change_int change_iv_2011_2007 prod cap years_of_edu male age work_exp tenure  multi size1 size2 size3  [pw=total_emp], robust  
eststo


esttab using ${tables}Table3_robustness.txt, replace cells(b(star fmt(%9.3f)) se(par fmt(%9.3f))) starlevels(* 0.10 ** 0.05 *** 0.01) /*
*/keep(change_iv_2001_1995 change_iv_2007_2001 change_iv_2011_2007) r2
eststo clear





*****INTENSITY OF ROUTINE OCCUPATIONS AT THE MUNICIPALITY LEVEL

******idapall3a is the matched employer-employee data with a few sample selection criteria
use ${data}idapall3a.dta, clear

destring akom, replace

recode akom (171=240) (181=230) (189=190) (205=230) (207=190) (208=210) (209=250) (211=260) (213=270) (215=270) (221=260) (225=250) /*
*/ (227=210)  (229=250) (231=219)  (235=240) (237=240) (251=350) (255=265) (257=350) (261=350) /*
*/ (263=265) (267=259) (271=336) (301=326) (303=340) (305=306) (307=370) (309=326) (311=330) (313=320) (315=316) (317=326) (319=326) /*
*/ (321=316) (323=326) (325=330) (327=306) (331=330) (333=330) (335=340) (337=340) (339=316)  (341=316) (343=306) /*
*/ (345=316) (351=320) (353=370) (355=360) (357=370) (359=360) (361=390) (363=360) (365=390) (367=360) (369=376) /*
*/ (371=376) (373=370) (375=376) (377=390) (379=360) (381=360) (383=360) (385=320) (387=376) (389=336) (391=376) (393=370) (395=376) /*
*/ (397=390) (401=400) (403=400) (405=400) (407=400) (409=400) (421=420) (423=480) (425=430) (427=479) (429=410) (431=430) (433=420) (435=479) (433=420) (435=479) (437=420) (439=440) /*
*/ (441=440) (443=492) (445=410) (447=440) (449=450) (451=410) (471=480) (473=430) (475=482) (477=430) (481=482) (483=480) (485=420) /*
*/ (487=482) (489=450) (491=420) (493=492) (495=450) (497=430) (499=420) (501=540) (503=580) (505=550) (507=540)  /*
*/ (511=510) (513=540) (515=510)  (517=550) (519=580) (521=550) (523=540) (527=575) (529=580) /*
*/ (531=550) (533=540) (535=540) (537=540) (539=580) (541=550) (543=510) (545=580) (551=530) (553=573) (555=573) (557=561) (559=575) (565=530) /*
*/ (567=561) (567=573) (569=575) (571=561) (577=573)  (603=630)  (609=615) (611=630) (613=766) (617=630) /*
*/ (619=766) (623=621) (625=756)  (629=621) (631=630) (651=657) (653=756) (655=760) (657=760) (659=760) (663=756) (667=760) (669=760) /*
*/ (673=665) (675=671) (677=657) (679=661) (681=760) (683=661) (685=657) (701=706) (703=746) (705=740) (709=710) (711=710) (713=710) (715=746) /*
*/  (721=706) (723=730) (725=707) (729=730) (731=730) (733=706) (735=707) (737=746) (739=706) (743=740) /*
*/ (745=746)  (749=740) (761=791) (763=791) (765=787) (767=710) (769=791) (771=740) (775=791) (777=779) (781=779) /*
*/ (783=779) (785=787)  (789=791)  (801=846) (803=849) (805=810) (807=810) (809=820) (811=849) (815=846) (817=851) /*
*/ (819=860) (821=860) (823=846) (827=820) (829=860) (831=851)  (835=849) (837=851) (839=860)  (841=813) (843=840) (845=840) (847=813) /*
*/ (861=820) if year<2006

**** 11 old municipalities (233, 509, 525, 601, 605, 627, 717, 719, 747, 793, 833)
***** were split into more municipalities: recode by exploiting the fact that dataset firm contains from 1998 on both the old and the new municipality kode, named
*****respectively GF_KOM_KODE and GF_KOM_KODE_2

destring cvrnr, replace
sort cvrnr year 
merge cvrnr year using ${data}akom2.dta 
drop _merge

bysort cvrnr: egen kom_ny=min( GF_KOM_KODE_2)

replace akom=kom_ny if (akom==233 | akom== 509 | akom==525 | akom==601 | akom== 605 | akom==627 | akom==717 | akom== 719 | akom==747 | akom==793 | akom== 833)


recode akom (147=101)

***drop those firms for which the municipality code cannot be identified

drop if akom==.

sort DISCO08_ALLE_INDK 

merge DISCO08_ALLE_INDK using ${data}conversion_table_isco.dta, update replace

keep if _merge==1 | _merge==4

drop _merge

gen disco=substr(DISCOALLE_INDK, 1,4)

compress

sort disco

merge disco using ${data}tasks_disco.dta, keep(imp_56 imp_57 imp_58 imp_59 imp_60 imp_51 imp_52 imp_53 imp_54 imp_55)

keep if _merge==3

drop _merge

pca imp_56 imp_57 imp_58 imp_59 imp_60
predict routine, score

pca imp_51 imp_52 imp_53 imp_54 imp_55
predict non_routine, score


sum routine, det
gen routine_job=(routine>r(p75))


sum non_routine, det
gen non_routine_job=(non_routine>r(p75))

sum routine, det
gen routine_job2=routine>r(p90)

sum routine, det
gen routine_job3=routine>r(p95)

sort pnr year
merge pnr year using ${data}ie_type.dta
keep if _merge==3
drop _merge

rename ie_type  ietype
rename opr_land ieland
***gen two different foreigner dummy: the first one including all nationalities (but DK), the second one excluding EU foreigners
gen natives_tot=ietype=="1"

gen foreigner_tot=(natives_tot!=1)

gen natives=(ietype=="1") | ieland=="5126" | ieland=="5130" | ieland=="5180" | ieland=="5134" | ieland=="5142" | ieland=="5150" | ieland=="5108" | ieland=="5140" | /*
*/ ieland=="5156" | ieland=="5164" | ieland=="5170"  | ieland=="5182" | ieland=="5104" | ieland=="5120" 


sort hfaudd

merge hfaudd using ${data}udd.dta
drop _merge

gen edu=substr(hffsp,1,2)
destring edu, replace

gen skill1=(edu==20 | edu==25 | edu==35| edu==40 | edu==15)
gen skill2=(edu==50 | edu==60 | edu==65 | edu==70)
gen skill3=(skill1==0 & skill2==0) & edu!=.
replace skill3=1 if edu==.

gen years_of_edu=0
replace years_of_edu=6 if edu==10
replace years_of_edu=10 if edu==15 | edu==35
replace years_of_edu=12 if edu==20 | edu==25 | edu==39
replace years_of_edu=14 if edu==40 
replace years_of_edu=15 if edu==50 | edu==60
replace years_of_edu=17 if edu==65
replace years_of_edu=20 if edu==70
replace years_of_edu=6 if edu==.


****several definitions of foreign workers
gen foreigner_nonEU=(natives!=1)
gen foreigner_EU=(ieland=="5126" | ieland=="5130" | ieland=="5180" | ieland=="5134" | ieland=="5142" | ieland=="5150" | ieland=="5108" | ieland=="5140" | /*
*/ ieland=="5156" | ieland=="5164" | ieland=="5170"  | ieland=="5182" | ieland=="5104" | ieland=="5120")

sum years_of_edu if foreigner_nonEU==1
sum years_of_edu if foreigner_EU==1
sum years_of_edu if natives==1

tab routine_job if natives==1
tab routine_job if foreigner_nonEU==1

gen routine_job_for=routine_job==1 & foreigner_nonEU==1
gen routine_job_nat=routine_job==1 & foreigner_nonEU==0


bysort cvrnr year: egen tot_rou_for=total(routine_job_for)
bysort cvrnr year: egen tot_rou_nat=total(routine_job_nat)
bysort cvrnr year: gen tot_emp3=_N

preserve
collapse routine_job tot_rou_for tot_rou_nat tot_emp3, by(cvrnr year)
sort cvrnr year
save ${data}firm_rou.dta, replace
restore

gen routine_job_nat=(routine_job==1 & natives==1)
gen routine_job_for=(routine_job==1 &  foreigner_nonEU==1)
gen non_routine_job_nat=(non_routine_job==1 & natives==1)
gen non_routine_job_for=(non_routine_job==1 &  foreigner_nonEU==1)
gen routine_job_nat2=(routine_job2==1 & natives==1)
gen routine_job_for2=(routine_job2==1 &  foreigner_nonEU==1)
gen routine_job_nat3=(routine_job3==1 & natives==1)
gen routine_job_for3=(routine_job3==1 &  foreigner_nonEU==1)

bysort akom year: egen tot_routine_nat=total(routine_job_nat)
bysort akom year: egen tot_routine_for=total(routine_job_for)
bysort akom year: egen tot_nroutine_nat=total(non_routine_job_nat)
bysort akom year: egen tot_nroutine_for=total(non_routine_job_for)
bysort akom year: egen tot_routine_nat2=total(routine_job_nat2)
bysort akom year: egen tot_routine_for2=total(routine_job_for2)
bysort akom year: egen tot_routine_nat3=total(routine_job_nat2)
bysort akom year: egen tot_routine_for3=total(routine_job_for2)
bysort akom year: gen tot_workers=_N
bysort akom year: egen routine_nat=median(routine) if natives==1
bysort akom year: egen routine_nat_m=min(routine_nat) 
replace routine_nat=routine_nat_m if routine_nat==.

bysort akom year: egen routine_for=median(routine) if foreigner_nonEU==1
bysort akom year: egen routine_for_m=min(routine_for) 
replace routine_for=routine_for_m if routine_for==.


collapse tot_routine_nat tot_routine_for tot_nroutine_nat tot_nroutine_for tot_routine_nat2 tot_routine_for2 tot_routine_nat3 tot_routine_for3 tot_workers routine_nat routine_for (median) routine, by(akom year)

sort akom year

save ${data}tasks_mun.dta, replace





***REPEAT THE SAME ANALYSIS WITH THE SHORT TERM/PARTIAL EQUILIBRIUM APPROACH

use ${data}inflow_akom.dta, clear

sort akom year
merge akom year using ${data}outflow.dta
keep if _merge==3
drop _merge

sort akom year
merge akom year using ${data}wage_natives.dta
keep if _merge==3
drop _merge

rename lonind lonind_natives


sort akom year
merge akom year using ${data}wage_natives_ls.dta
keep if _merge==3
drop _merge

rename lonind lonind_natives_ls


sort akom year
merge akom year using ${data}wage_nonEU_ls.dta
keep if _merge==3
drop _merge

rename lonind lonind_nonEU_ls

sort akom year
merge akom year using ${data}wage_for.dta
keep if _merge==3
drop _merge

sort akom year
merge akom year using ${data}iv_off5.dta
keep if _merge==3
drop _merge

tsset akom year

gen for_lag=l.foreigner_nonEU
gen for_iv=l.s_hat


preserve
use ${data}offshoring_fin_will.dta, clear
rename total_emp_akom total_emp
collapse productivity capital_int  years_of_edu  male age  work_exp tenure  foreign multi size1 size2 size3 total_emp, by(akom year)
sort akom year
save ${data}akom_char.dta, replace
restore


gen tot_firm2=ln(tot_firm+1)
gen inflow2=ln(inflow+1)
gen stock_natives2=ln(stock_natives+1)
gen outflow2=ln(outflow+1)
gen lonind1=ln(lonind)
gen lonind2=ln(lonind_natives)
gen lonind3=ln(lonind_natives_ls)
gen lonind4=(lonind_nonEU)


sort akom year
merge akom year using ${data}akom_char.dta
keep if _merge==3
drop _merge

sort akom year
merge akom year using ${data}tasks_mun.dta
keep if _merge==3
drop _merge


capture program drop meany
program define meany, rclass
args var spec
sum `var' if e(sample), mean
estadd scalar Meany=r(mean): `spec'
end 


reghdfe tot_firm2 productivity capital_int  years_of_edu  male age  work_exp tenure  foreign multi size1 size2 size3 (for_lag=for_iv), absorb(akom year) ffirst stages(first)
estimates store m1
meany tot_firm2 m1
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace

reghdfe inflow2 productivity capital_int  years_of_edu  male age  work_exp tenure  foreign multi size1 size2 size3 (for_lag=for_iv), absorb(akom year) ffirst stages(first)
estimates store m2
meany inflow2 m2
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace

reghdfe lonind2 productivity capital_int  years_of_edu  male age  work_exp tenure  foreign multi size1 size2 size3 (for_lag=for_iv), absorb(akom year) ffirst stages(first)
estimates store m3
meany lonind1 m3
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace

reghdfe lonind4 productivity capital_int  years_of_edu  male age  work_exp tenure  foreign multi size1 size2 size3 (for_lag=for_iv), absorb(akom year) ffirst stages(first)
estimates store m4
meany lonind1 m4
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace



reghdfe productivity capital_int  years_of_edu  male age  work_exp tenure  foreign multi size1 size2 size3 (for_lag=for_iv), absorb(akom year) ffirst stages(first)
estimates store m5
meany productivity m5
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace



esttab m1  m2 m3 m4 m5   using ${tables}Table6_568910.txt, replace style(tab)  cells(b(star fmt(%9.3f)) se(par fmt(%9.3f))) stats(Meany F p r2 N, fmt(%9.3f  %9.3f  %9.3f %9.3f  %9.0fc) ///
labels("Mean Y" "F-stat" "p-value"  "R-sq" N)) starlevels(* 0.10 ** 0.05 *** 0.01) /*
*/keep( for_lag)

